1.Objetivo do desafio

O objetivo do desafio House prices é prever o valor de venda de um imóvel de acordo com as 79 variáveis do banco de dados. O desafio desponibiliza 4 arquivos, um dataset de treino e um dataset de teste ambos em csv, um arquivo txt com a descrição das colunas do dataset treino e um arquivo csv exemplo de como o resultado deve ser submetido. Para acessar mais informações sobre o desafio e fazer download dos dados basta acessar o link.

2. Lendo os arquivos.

# Carregando o tidyverse

library(tidyverse)
── Attaching core tidyverse packages ──────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.0     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.1     ✔ tibble    3.1.8
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     ── Conflicts ────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
# Setando o diretório de trabalho
setwd(dir = "C:/Users/rodolfo.paula/Desktop/PESSOAL_RODOLFO/Better Decisions/scripts/kaggle/house_prices")

# Lendo os arquivos de treino e de teste
train = data.frame(read.csv("train.csv"))
test = data.frame(read.csv("test.csv"))

# Visualizando as dimensões dos arquivos
dim(train)
[1] 1460   81
dim(test)
[1] 1459   80
# Visualizando o dataset de treino 
glimpse(train)
Rows: 1,460
Columns: 81
$ Id            <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
$ MSSubClass    <int> 60, 20, 60, 70, 60, 50, 20, 60, 50, 190, 20, 60,…
$ MSZoning      <chr> "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
$ LotFrontage   <int> 65, 80, 68, 60, 84, 85, 75, NA, 51, 50, 70, 85, …
$ LotArea       <int> 8450, 9600, 11250, 9550, 14260, 14115, 10084, 10…
$ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", …
$ Alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ LotShape      <chr> "Reg", "Reg", "IR1", "IR1", "IR1", "IR1", "Reg",…
$ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl",…
$ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub"…
$ LotConfig     <chr> "Inside", "FR2", "Inside", "Corner", "FR2", "Ins…
$ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl",…
$ Neighborhood  <chr> "CollgCr", "Veenker", "CollgCr", "Crawfor", "NoR…
$ Condition1    <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm",…
$ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", …
$ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", …
$ HouseStyle    <chr> "2Story", "1Story", "2Story", "2Story", "2Story"…
$ OverallQual   <int> 7, 6, 7, 7, 8, 5, 8, 7, 7, 5, 5, 9, 5, 7, 6, 7, …
$ OverallCond   <int> 5, 8, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 6, 5, 5, 8, …
$ YearBuilt     <int> 2003, 1976, 2001, 1915, 2000, 1993, 2004, 1973, …
$ YearRemodAdd  <int> 2003, 1976, 2002, 1970, 2000, 1995, 2005, 1973, …
$ RoofStyle     <chr> "Gable", "Gable", "Gable", "Gable", "Gable", "Ga…
$ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "Com…
$ Exterior1st   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Sdng", "Vin…
$ Exterior2nd   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Shng", "Vin…
$ MasVnrType    <chr> "BrkFace", "None", "BrkFace", "None", "BrkFace",…
$ MasVnrArea    <int> 196, 0, 162, 0, 350, 0, 186, 240, 0, 0, 0, 286, …
$ ExterQual     <chr> "Gd", "TA", "Gd", "TA", "Gd", "TA", "Gd", "TA", …
$ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ Foundation    <chr> "PConc", "CBlock", "PConc", "BrkTil", "PConc", "…
$ BsmtQual      <chr> "Gd", "Gd", "Gd", "TA", "Gd", "Gd", "Ex", "Gd", …
$ BsmtCond      <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA", …
$ BsmtExposure  <chr> "No", "Gd", "Mn", "No", "Av", "No", "Av", "Mn", …
$ BsmtFinType1  <chr> "GLQ", "ALQ", "GLQ", "ALQ", "GLQ", "GLQ", "GLQ",…
$ BsmtFinSF1    <int> 706, 978, 486, 216, 655, 732, 1369, 859, 0, 851,…
$ BsmtFinType2  <chr> "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf",…
$ BsmtFinSF2    <int> 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0,…
$ BsmtUnfSF     <int> 150, 284, 434, 540, 490, 64, 317, 216, 952, 140,…
$ TotalBsmtSF   <int> 856, 1262, 920, 756, 1145, 796, 1686, 1107, 952,…
$ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", …
$ HeatingQC     <chr> "Ex", "Ex", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", …
$ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB…
$ X1stFlrSF     <int> 856, 1262, 920, 961, 1145, 796, 1694, 1107, 1022…
$ X2ndFlrSF     <int> 854, 0, 866, 756, 1053, 566, 0, 983, 752, 0, 0, …
$ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ GrLivArea     <int> 1710, 1262, 1786, 1717, 2198, 1362, 1694, 2090, …
$ BsmtFullBath  <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, …
$ BsmtHalfBath  <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ FullBath      <int> 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 3, 1, 2, 1, 1, …
$ HalfBath      <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, …
$ BedroomAbvGr  <int> 3, 3, 3, 3, 4, 1, 3, 3, 2, 2, 3, 4, 2, 3, 2, 2, …
$ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, …
$ KitchenQual   <chr> "Gd", "TA", "Gd", "Gd", "Gd", "TA", "Gd", "TA", …
$ TotRmsAbvGrd  <int> 8, 6, 6, 7, 9, 5, 7, 7, 8, 5, 5, 11, 4, 7, 5, 5,…
$ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ",…
$ Fireplaces    <int> 0, 1, 1, 1, 1, 0, 1, 2, 2, 2, 0, 2, 0, 1, 1, 0, …
$ FireplaceQu   <chr> NA, "TA", "TA", "Gd", "TA", NA, "Gd", "TA", "TA"…
$ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Detchd", "Attchd"…
$ GarageYrBlt   <int> 2003, 1976, 2001, 1998, 2000, 1993, 2004, 1973, …
$ GarageFinish  <chr> "RFn", "RFn", "RFn", "Unf", "RFn", "Unf", "RFn",…
$ GarageCars    <int> 2, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1, 3, 1, 3, 1, 2, …
$ GarageArea    <int> 548, 460, 608, 642, 836, 480, 636, 484, 468, 205…
$ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ WoodDeckSF    <int> 0, 298, 0, 0, 192, 40, 255, 235, 90, 0, 0, 147, …
$ OpenPorchSF   <int> 61, 0, 42, 35, 84, 30, 57, 204, 0, 4, 0, 21, 0, …
$ EnclosedPorch <int> 0, 0, 0, 272, 0, 0, 0, 228, 205, 0, 0, 0, 0, 0, …
$ X3SsnPorch    <int> 0, 0, 0, 0, 0, 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ ScreenPorch   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0…
$ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ PoolQC        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Fence         <chr> NA, NA, NA, NA, NA, "MnPrv", NA, NA, NA, NA, NA,…
$ MiscFeature   <chr> NA, NA, NA, NA, NA, "Shed", NA, "Shed", NA, NA, …
$ MiscVal       <int> 0, 0, 0, 0, 0, 700, 0, 350, 0, 0, 0, 0, 0, 0, 0,…
$ MoSold        <int> 2, 5, 9, 2, 12, 10, 8, 11, 4, 1, 2, 7, 9, 8, 5, …
$ YrSold        <int> 2008, 2007, 2008, 2006, 2008, 2009, 2007, 2009, …
$ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", …
$ SaleCondition <chr> "Normal", "Normal", "Normal", "Abnorml", "Normal…
$ SalePrice     <int> 208500, 181500, 223500, 140000, 250000, 143000, …
# Obtendo um sumário dos dados de treino

summary(train)
       Id           MSSubClass      MSZoning          LotFrontage    
 Min.   :   1.0   Min.   : 20.0   Length:1460        Min.   : 21.00  
 1st Qu.: 365.8   1st Qu.: 20.0   Class :character   1st Qu.: 59.00  
 Median : 730.5   Median : 50.0   Mode  :character   Median : 69.00  
 Mean   : 730.5   Mean   : 56.9                      Mean   : 70.05  
 3rd Qu.:1095.2   3rd Qu.: 70.0                      3rd Qu.: 80.00  
 Max.   :1460.0   Max.   :190.0                      Max.   :313.00  
                                                     NA's   :259     
    LotArea          Street             Alley          
 Min.   :  1300   Length:1460        Length:1460       
 1st Qu.:  7554   Class :character   Class :character  
 Median :  9478   Mode  :character   Mode  :character  
 Mean   : 10517                                        
 3rd Qu.: 11602                                        
 Max.   :215245                                        
                                                       
   LotShape         LandContour         Utilities        
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
  LotConfig          LandSlope         Neighborhood      
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
  Condition1         Condition2          BldgType        
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
  HouseStyle         OverallQual      OverallCond      YearBuilt   
 Length:1460        Min.   : 1.000   Min.   :1.000   Min.   :1872  
 Class :character   1st Qu.: 5.000   1st Qu.:5.000   1st Qu.:1954  
 Mode  :character   Median : 6.000   Median :5.000   Median :1973  
                    Mean   : 6.099   Mean   :5.575   Mean   :1971  
                    3rd Qu.: 7.000   3rd Qu.:6.000   3rd Qu.:2000  
                    Max.   :10.000   Max.   :9.000   Max.   :2010  
                                                                   
  YearRemodAdd   RoofStyle           RoofMatl        
 Min.   :1950   Length:1460        Length:1460       
 1st Qu.:1967   Class :character   Class :character  
 Median :1994   Mode  :character   Mode  :character  
 Mean   :1985                                        
 3rd Qu.:2004                                        
 Max.   :2010                                        
                                                     
 Exterior1st        Exterior2nd         MasVnrType       
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
   MasVnrArea      ExterQual          ExterCond        
 Min.   :   0.0   Length:1460        Length:1460       
 1st Qu.:   0.0   Class :character   Class :character  
 Median :   0.0   Mode  :character   Mode  :character  
 Mean   : 103.7                                        
 3rd Qu.: 166.0                                        
 Max.   :1600.0                                        
 NA's   :8                                             
  Foundation          BsmtQual           BsmtCond        
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
 BsmtExposure       BsmtFinType1         BsmtFinSF1    
 Length:1460        Length:1460        Min.   :   0.0  
 Class :character   Class :character   1st Qu.:   0.0  
 Mode  :character   Mode  :character   Median : 383.5  
                                       Mean   : 443.6  
                                       3rd Qu.: 712.2  
                                       Max.   :5644.0  
                                                       
 BsmtFinType2         BsmtFinSF2        BsmtUnfSF       TotalBsmtSF    
 Length:1460        Min.   :   0.00   Min.   :   0.0   Min.   :   0.0  
 Class :character   1st Qu.:   0.00   1st Qu.: 223.0   1st Qu.: 795.8  
 Mode  :character   Median :   0.00   Median : 477.5   Median : 991.5  
                    Mean   :  46.55   Mean   : 567.2   Mean   :1057.4  
                    3rd Qu.:   0.00   3rd Qu.: 808.0   3rd Qu.:1298.2  
                    Max.   :1474.00   Max.   :2336.0   Max.   :6110.0  
                                                                       
   Heating           HeatingQC          CentralAir       
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
  Electrical          X1stFlrSF      X2ndFlrSF     LowQualFinSF    
 Length:1460        Min.   : 334   Min.   :   0   Min.   :  0.000  
 Class :character   1st Qu.: 882   1st Qu.:   0   1st Qu.:  0.000  
 Mode  :character   Median :1087   Median :   0   Median :  0.000  
                    Mean   :1163   Mean   : 347   Mean   :  5.845  
                    3rd Qu.:1391   3rd Qu.: 728   3rd Qu.:  0.000  
                    Max.   :4692   Max.   :2065   Max.   :572.000  
                                                                   
   GrLivArea     BsmtFullBath     BsmtHalfBath        FullBath    
 Min.   : 334   Min.   :0.0000   Min.   :0.00000   Min.   :0.000  
 1st Qu.:1130   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:1.000  
 Median :1464   Median :0.0000   Median :0.00000   Median :2.000  
 Mean   :1515   Mean   :0.4253   Mean   :0.05753   Mean   :1.565  
 3rd Qu.:1777   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:2.000  
 Max.   :5642   Max.   :3.0000   Max.   :2.00000   Max.   :3.000  
                                                                  
    HalfBath       BedroomAbvGr    KitchenAbvGr   KitchenQual       
 Min.   :0.0000   Min.   :0.000   Min.   :0.000   Length:1460       
 1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:1.000   Class :character  
 Median :0.0000   Median :3.000   Median :1.000   Mode  :character  
 Mean   :0.3829   Mean   :2.866   Mean   :1.047                     
 3rd Qu.:1.0000   3rd Qu.:3.000   3rd Qu.:1.000                     
 Max.   :2.0000   Max.   :8.000   Max.   :3.000                     
                                                                    
  TotRmsAbvGrd     Functional          Fireplaces    FireplaceQu       
 Min.   : 2.000   Length:1460        Min.   :0.000   Length:1460       
 1st Qu.: 5.000   Class :character   1st Qu.:0.000   Class :character  
 Median : 6.000   Mode  :character   Median :1.000   Mode  :character  
 Mean   : 6.518                      Mean   :0.613                     
 3rd Qu.: 7.000                      3rd Qu.:1.000                     
 Max.   :14.000                      Max.   :3.000                     
                                                                       
  GarageType         GarageYrBlt   GarageFinish         GarageCars   
 Length:1460        Min.   :1900   Length:1460        Min.   :0.000  
 Class :character   1st Qu.:1961   Class :character   1st Qu.:1.000  
 Mode  :character   Median :1980   Mode  :character   Median :2.000  
                    Mean   :1979                      Mean   :1.767  
                    3rd Qu.:2002                      3rd Qu.:2.000  
                    Max.   :2010                      Max.   :4.000  
                    NA's   :81                                       
   GarageArea      GarageQual         GarageCond       
 Min.   :   0.0   Length:1460        Length:1460       
 1st Qu.: 334.5   Class :character   Class :character  
 Median : 480.0   Mode  :character   Mode  :character  
 Mean   : 473.0                                        
 3rd Qu.: 576.0                                        
 Max.   :1418.0                                        
                                                       
  PavedDrive          WoodDeckSF      OpenPorchSF     EnclosedPorch   
 Length:1460        Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
 Class :character   1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  0.00  
 Mode  :character   Median :  0.00   Median : 25.00   Median :  0.00  
                    Mean   : 94.24   Mean   : 46.66   Mean   : 21.95  
                    3rd Qu.:168.00   3rd Qu.: 68.00   3rd Qu.:  0.00  
                    Max.   :857.00   Max.   :547.00   Max.   :552.00  
                                                                      
   X3SsnPorch      ScreenPorch        PoolArea          PoolQC         
 Min.   :  0.00   Min.   :  0.00   Min.   :  0.000   Length:1460       
 1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  0.000   Class :character  
 Median :  0.00   Median :  0.00   Median :  0.000   Mode  :character  
 Mean   :  3.41   Mean   : 15.06   Mean   :  2.759                     
 3rd Qu.:  0.00   3rd Qu.:  0.00   3rd Qu.:  0.000                     
 Max.   :508.00   Max.   :480.00   Max.   :738.000                     
                                                                       
    Fence           MiscFeature           MiscVal        
 Length:1460        Length:1460        Min.   :    0.00  
 Class :character   Class :character   1st Qu.:    0.00  
 Mode  :character   Mode  :character   Median :    0.00  
                                       Mean   :   43.49  
                                       3rd Qu.:    0.00  
                                       Max.   :15500.00  
                                                         
     MoSold           YrSold       SaleType         SaleCondition     
 Min.   : 1.000   Min.   :2006   Length:1460        Length:1460       
 1st Qu.: 5.000   1st Qu.:2007   Class :character   Class :character  
 Median : 6.000   Median :2008   Mode  :character   Mode  :character  
 Mean   : 6.322   Mean   :2008                                        
 3rd Qu.: 8.000   3rd Qu.:2009                                        
 Max.   :12.000   Max.   :2010                                        
                                                                      
   SalePrice     
 Min.   : 34900  
 1st Qu.:129975  
 Median :163000  
 Mean   :180921  
 3rd Qu.:214000  
 Max.   :755000  
                 

Utilizando o sumário fica claro que o banco de dados de treino possui muitas variáveis. Vamos incialmente separar o banco de dados em variáveis qualitativas e variáveis quantitativas, assim se torna mais facil o tratamento e limpeza dos dados.

Para as variáveis qualitativas que apresentem valores faltantes ou NA`s vamos substituir pela moda daquela variável e para as variáveis quantitativas de que apresentem valores faltantes vamos substituir pela média.

A coluna Id será eliminada por ser um valor sequencial que nao representa nehuma informação útil como variável explicativa capaz de influenciar no valor predito do preço de venda de uma casa.

Para separar o dataset em dados quantitativos e qualitativos o melhor caminho é usara a função select_if() do pacote dplyr


# Separando o dataframe em train quanti e train quali usando a função select_if

train_quanti <- select_if(train, is.numeric)
train_quali <- select_if(train, is.character)

3. Limpesa de dados Qualitativos

#Visualizando os dados 
glimpse(train_quali)
Rows: 1,460
Columns: 43
$ MSZoning      <chr> "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
$ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", …
$ Alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ LotShape      <chr> "Reg", "Reg", "IR1", "IR1", "IR1", "IR1", "Reg",…
$ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl",…
$ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub"…
$ LotConfig     <chr> "Inside", "FR2", "Inside", "Corner", "FR2", "Ins…
$ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl",…
$ Neighborhood  <chr> "CollgCr", "Veenker", "CollgCr", "Crawfor", "NoR…
$ Condition1    <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm",…
$ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", …
$ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", …
$ HouseStyle    <chr> "2Story", "1Story", "2Story", "2Story", "2Story"…
$ RoofStyle     <chr> "Gable", "Gable", "Gable", "Gable", "Gable", "Ga…
$ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "Com…
$ Exterior1st   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Sdng", "Vin…
$ Exterior2nd   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Shng", "Vin…
$ MasVnrType    <chr> "BrkFace", "None", "BrkFace", "None", "BrkFace",…
$ ExterQual     <chr> "Gd", "TA", "Gd", "TA", "Gd", "TA", "Gd", "TA", …
$ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ Foundation    <chr> "PConc", "CBlock", "PConc", "BrkTil", "PConc", "…
$ BsmtQual      <chr> "Gd", "Gd", "Gd", "TA", "Gd", "Gd", "Ex", "Gd", …
$ BsmtCond      <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA", …
$ BsmtExposure  <chr> "No", "Gd", "Mn", "No", "Av", "No", "Av", "Mn", …
$ BsmtFinType1  <chr> "GLQ", "ALQ", "GLQ", "ALQ", "GLQ", "GLQ", "GLQ",…
$ BsmtFinType2  <chr> "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf",…
$ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", …
$ HeatingQC     <chr> "Ex", "Ex", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", …
$ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB…
$ KitchenQual   <chr> "Gd", "TA", "Gd", "Gd", "Gd", "TA", "Gd", "TA", …
$ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ",…
$ FireplaceQu   <chr> NA, "TA", "TA", "Gd", "TA", NA, "Gd", "TA", "TA"…
$ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Detchd", "Attchd"…
$ GarageFinish  <chr> "RFn", "RFn", "RFn", "Unf", "RFn", "Unf", "RFn",…
$ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ PoolQC        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Fence         <chr> NA, NA, NA, NA, NA, "MnPrv", NA, NA, NA, NA, NA,…
$ MiscFeature   <chr> NA, NA, NA, NA, NA, "Shed", NA, "Shed", NA, NA, …
$ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", …
$ SaleCondition <chr> "Normal", "Normal", "Normal", "Abnorml", "Normal…
# Procurando Valores missing
colSums(is.na(train_quali))
     MSZoning        Street         Alley      LotShape   LandContour 
            0             0          1369             0             0 
    Utilities     LotConfig     LandSlope  Neighborhood    Condition1 
            0             0             0             0             0 
   Condition2      BldgType    HouseStyle     RoofStyle      RoofMatl 
            0             0             0             0             0 
  Exterior1st   Exterior2nd    MasVnrType     ExterQual     ExterCond 
            0             0             8             0             0 
   Foundation      BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1 
            0            37            37            38            37 
 BsmtFinType2       Heating     HeatingQC    CentralAir    Electrical 
           38             0             0             0             1 
  KitchenQual    Functional   FireplaceQu    GarageType  GarageFinish 
            0             0           690            81            81 
   GarageQual    GarageCond    PavedDrive        PoolQC         Fence 
           81            81             0          1453          1179 
  MiscFeature      SaleType SaleCondition 
         1406             0             0 

Com essa analise a cima sabemos o seguinte sobre os nossos dados categóricos:


# Deletando as colunas que não são de interesse para o dataset. 

train_quali <-train_quali %>% select(-c(Alley, PoolQC, Fence, MiscFeature))
print(dim(train_quali))
[1] 1460   39
# Lista com as variáveis : MasVnrType,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Electrical,
# FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond
##############################################################
# Contando os valores NA`s das variáveis com missing values
unique(train_quali$MasVnrType)
[1] "BrkFace" "None"    "Stone"   "BrkCmn"  NA       
sort(table(train_quali$MasVnrType))

 BrkCmn   Stone BrkFace    None 
     15     128     445     864 
# Alterando os valores de NA para o valor da Moda de MasVnrType.
train_quali <- train_quali %>% mutate(MasVnrType=coalesce(MasVnrType,"None"))

##############################################
# Contando os valores NA`s das variáveis com missing values
unique(train_quali$BsmtQual)
[1] "Gd" "TA" "Ex" NA   "Fa"
sort(table(train_quali$BsmtQual))

 Fa  Ex  Gd  TA 
 35 121 618 649 
# Alterando os valores de NA para o valor da Moda de BsmtQual.
train_quali <- train_quali %>%    
  mutate(BsmtQual=coalesce(BsmtQual,"TA"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$BsmtCond)
[1] "TA" "Gd" NA   "Fa" "Po"
sort(table(train_quali$BsmtCond))

  Po   Fa   Gd   TA 
   2   45   65 1311 
# Alterando os valores de NA para o valor da Moda de BsmtCond
train_quali <- train_quali %>%    
  mutate(BsmtCond=coalesce(BsmtCond,"TA"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$BsmtExposure)
[1] "No" "Gd" "Mn" "Av" NA  
sort(table(train_quali$BsmtExposure))

 Mn  Gd  Av  No 
114 134 221 953 
# Alterando os valores de NA para o valor da Moda de BsmtExposure
train_quali <- train_quali %>%    
  mutate(BsmtExposure=coalesce(BsmtExposure,"No"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$BsmtFinType1)
[1] "GLQ" "ALQ" "Unf" "Rec" "BLQ" NA    "LwQ"
sort(table(train_quali$BsmtFinType1))

LwQ Rec BLQ ALQ GLQ Unf 
 74 133 148 220 418 430 
# Alterando os valores de NA para o valor da Moda de BsmtFinType1
train_quali <- train_quali %>%    
  mutate(BsmtFinType1=coalesce(BsmtFinType1,"Unf"))
##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$BsmtFinType2)
[1] "Unf" "BLQ" NA    "ALQ" "Rec" "LwQ" "GLQ"
sort(table(train_quali$BsmtFinType2))

 GLQ  ALQ  BLQ  LwQ  Rec  Unf 
  14   19   33   46   54 1256 
# Alterando os valores de NA para o valor da Moda de BsmtFinType2
train_quali <- train_quali %>%    
  mutate(BsmtFinType2=coalesce(BsmtFinType2,"Unf"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$Electrical)
[1] "SBrkr" "FuseF" "FuseA" "FuseP" "Mix"   NA     
sort(table(train_quali$Electrical))

  Mix FuseP FuseF FuseA SBrkr 
    1     3    27    94  1334 
# Alterando os valores de NA para o valor da Moda de Electrical
train_quali <- train_quali %>%    
  mutate(Electrical=coalesce(Electrical,"SBrkr"))


##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$FireplaceQu)
[1] NA   "TA" "Gd" "Fa" "Ex" "Po"
sort(table(train_quali$FireplaceQu))

 Po  Ex  Fa  TA  Gd 
 20  24  33 313 380 
# Alterando os valores de NA para o valor da Moda de FireplaceQu
train_quali <- train_quali %>%    
  mutate(FireplaceQu=coalesce(FireplaceQu,"Gd"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$GarageType)
[1] "Attchd"  "Detchd"  "BuiltIn" "CarPort" NA        "Basment"
[7] "2Types" 
sort(table(train_quali$GarageType))

 2Types CarPort Basment BuiltIn  Detchd  Attchd 
      6       9      19      88     387     870 
# Alterando os valores de NA para o valor da Moda de GarageType
train_quali <- train_quali %>%    
  mutate(GarageType=coalesce(GarageType,"Attchd"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$GarageFinish)
[1] "RFn" "Unf" "Fin" NA   
sort(table(train_quali$GarageFinish))

Fin RFn Unf 
352 422 605 
# Alterando os valores de NA para o valor da Moda de GarageFinish
train_quali <- train_quali %>%    
  mutate(GarageFinish=coalesce(GarageFinish,"Unf"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$GarageQual)
[1] "TA" "Fa" "Gd" NA   "Ex" "Po"
sort(table(train_quali$GarageQual))

  Ex   Po   Gd   Fa   TA 
   3    3   14   48 1311 
# Alterando os valores de NA para o valor da Moda de GarageQual
train_quali <- train_quali %>%    
  mutate(GarageQual=coalesce(GarageQual,"TA"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$GarageCond)
[1] "TA" "Fa" NA   "Gd" "Po" "Ex"
sort(table(train_quali$GarageCond))

  Ex   Po   Gd   Fa   TA 
   2    7    9   35 1326 
# Alterando os valores de NA para o valor da Moda de GarageCond
train_quali <- train_quali %>%    
  mutate(GarageCond=coalesce(GarageCond,"TA"))

No entanto, alterar na mão cada variável não é a solução mais inteligente, imagine um caso de um dataset com centenas ou até milhares de variáveis para serem alteradas.

O melhor a ser feito é loop do tipo for, capaz de varrer todas as variáveis, obter o valor de frequencia para cada valor e armazenar este com a moda e em seguida substituir os missing values de cada variável.

Vamos então carregar o banco de dados novamente para anular as transformações realizadas e separar em dados quantitativos e qualitativos e em seguida somar os valores NA`s das variáveis.


# Somando os valores NA`s para cada coluna
colSums(is.na(train_quali))
     MSZoning        Street      LotShape   LandContour     Utilities 
            0             0             0             0             0 
    LotConfig     LandSlope  Neighborhood    Condition1    Condition2 
            0             0             0             0             0 
     BldgType    HouseStyle     RoofStyle      RoofMatl   Exterior1st 
            0             0             0             0             0 
  Exterior2nd    MasVnrType     ExterQual     ExterCond    Foundation 
            0             0             0             0             0 
     BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1  BsmtFinType2 
            0             0             0             0             0 
      Heating     HeatingQC    CentralAir    Electrical   KitchenQual 
            0             0             0             0             0 
   Functional   FireplaceQu    GarageType  GarageFinish    GarageQual 
            0             0             0             0             0 
   GarageCond    PavedDrive      SaleType SaleCondition 
            0             0             0             0 
# Montando um loop do tipo for 
for(item in colnames(train_quali)){
  
  x = data.frame(sort(table(train_quali[item])))
  #print(x)
  mode = tail(x, n = 1)
  #print(mode)
  moda = (mode[1,1])
  moda <- (as.character(moda))
  #print(moda)
  train_quali <- train_quali %>% 
  mutate(across(item, ~ case_when(is.na(.) ~ moda, TRUE ~ .)))
  
  
  
}
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `across(item, ~case_when(is.na(.) ~ moda, TRUE ~ .))`.
Caused by warning:
! Using an external vector in selections was deprecated in tidyselect
  1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
  # Was:
  data %>% select(item)

  # Now:
  data %>% select(all_of(item))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning
was generated.
# testando o resultado
unique(train_quali$Street)
[1] "Pave" "Grvl"
table(train_quali$Street)

Grvl Pave 
   6 1454 
# Somando os valores NA`s para cada coluna para conferir os resultados 
colSums(is.na(train_quali))
     MSZoning        Street      LotShape   LandContour     Utilities 
            0             0             0             0             0 
    LotConfig     LandSlope  Neighborhood    Condition1    Condition2 
            0             0             0             0             0 
     BldgType    HouseStyle     RoofStyle      RoofMatl   Exterior1st 
            0             0             0             0             0 
  Exterior2nd    MasVnrType     ExterQual     ExterCond    Foundation 
            0             0             0             0             0 
     BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1  BsmtFinType2 
            0             0             0             0             0 
      Heating     HeatingQC    CentralAir    Electrical   KitchenQual 
            0             0             0             0             0 
   Functional   FireplaceQu    GarageType  GarageFinish    GarageQual 
            0             0             0             0             0 
   GarageCond    PavedDrive      SaleType SaleCondition 
            0             0             0             0 

4. Limpeza de dados quantitativos

Da mesma maneira que realizamos a limpeza dos dados faltantes no banco de dados qualitativos iniciamos aqui a limpeza de dados faltantes nos dados quantitativos

# Visualizando os dados 
glimpse(train_quanti)
Rows: 1,460
Columns: 38
$ Id            <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
$ MSSubClass    <int> 60, 20, 60, 70, 60, 50, 20, 60, 50, 190, 20, 60,…
$ LotFrontage   <int> 65, 80, 68, 60, 84, 85, 75, NA, 51, 50, 70, 85, …
$ LotArea       <int> 8450, 9600, 11250, 9550, 14260, 14115, 10084, 10…
$ OverallQual   <int> 7, 6, 7, 7, 8, 5, 8, 7, 7, 5, 5, 9, 5, 7, 6, 7, …
$ OverallCond   <int> 5, 8, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 6, 5, 5, 8, …
$ YearBuilt     <int> 2003, 1976, 2001, 1915, 2000, 1993, 2004, 1973, …
$ YearRemodAdd  <int> 2003, 1976, 2002, 1970, 2000, 1995, 2005, 1973, …
$ MasVnrArea    <int> 196, 0, 162, 0, 350, 0, 186, 240, 0, 0, 0, 286, …
$ BsmtFinSF1    <int> 706, 978, 486, 216, 655, 732, 1369, 859, 0, 851,…
$ BsmtFinSF2    <int> 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0,…
$ BsmtUnfSF     <int> 150, 284, 434, 540, 490, 64, 317, 216, 952, 140,…
$ TotalBsmtSF   <int> 856, 1262, 920, 756, 1145, 796, 1686, 1107, 952,…
$ X1stFlrSF     <int> 856, 1262, 920, 961, 1145, 796, 1694, 1107, 1022…
$ X2ndFlrSF     <int> 854, 0, 866, 756, 1053, 566, 0, 983, 752, 0, 0, …
$ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ GrLivArea     <int> 1710, 1262, 1786, 1717, 2198, 1362, 1694, 2090, …
$ BsmtFullBath  <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, …
$ BsmtHalfBath  <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ FullBath      <int> 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 3, 1, 2, 1, 1, …
$ HalfBath      <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, …
$ BedroomAbvGr  <int> 3, 3, 3, 3, 4, 1, 3, 3, 2, 2, 3, 4, 2, 3, 2, 2, …
$ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, …
$ TotRmsAbvGrd  <int> 8, 6, 6, 7, 9, 5, 7, 7, 8, 5, 5, 11, 4, 7, 5, 5,…
$ Fireplaces    <int> 0, 1, 1, 1, 1, 0, 1, 2, 2, 2, 0, 2, 0, 1, 1, 0, …
$ GarageYrBlt   <int> 2003, 1976, 2001, 1998, 2000, 1993, 2004, 1973, …
$ GarageCars    <int> 2, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1, 3, 1, 3, 1, 2, …
$ GarageArea    <int> 548, 460, 608, 642, 836, 480, 636, 484, 468, 205…
$ WoodDeckSF    <int> 0, 298, 0, 0, 192, 40, 255, 235, 90, 0, 0, 147, …
$ OpenPorchSF   <int> 61, 0, 42, 35, 84, 30, 57, 204, 0, 4, 0, 21, 0, …
$ EnclosedPorch <int> 0, 0, 0, 272, 0, 0, 0, 228, 205, 0, 0, 0, 0, 0, …
$ X3SsnPorch    <int> 0, 0, 0, 0, 0, 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ ScreenPorch   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0…
$ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ MiscVal       <int> 0, 0, 0, 0, 0, 700, 0, 350, 0, 0, 0, 0, 0, 0, 0,…
$ MoSold        <int> 2, 5, 9, 2, 12, 10, 8, 11, 4, 1, 2, 7, 9, 8, 5, …
$ YrSold        <int> 2008, 2007, 2008, 2006, 2008, 2009, 2007, 2009, …
$ SalePrice     <int> 208500, 181500, 223500, 140000, 250000, 143000, …
# Somando os valores faltantes
colSums(is.na(train_quanti))
           Id    MSSubClass   LotFrontage       LotArea   OverallQual 
            0             0           259             0             0 
  OverallCond     YearBuilt  YearRemodAdd    MasVnrArea    BsmtFinSF1 
            0             0             0             8             0 
   BsmtFinSF2     BsmtUnfSF   TotalBsmtSF     X1stFlrSF     X2ndFlrSF 
            0             0             0             0             0 
 LowQualFinSF     GrLivArea  BsmtFullBath  BsmtHalfBath      FullBath 
            0             0             0             0             0 
     HalfBath  BedroomAbvGr  KitchenAbvGr  TotRmsAbvGrd    Fireplaces 
            0             0             0             0             0 
  GarageYrBlt    GarageCars    GarageArea    WoodDeckSF   OpenPorchSF 
           81             0             0             0             0 
EnclosedPorch    X3SsnPorch   ScreenPorch      PoolArea       MiscVal 
            0             0             0             0             0 
       MoSold        YrSold     SalePrice 
            0             0             0 

Com o resultado do somatório dos valores ausentes por variável do banco de dados quantitativos, observamos que apenas 3 variáveis possuem missing values. Sendo assim a melhor opção é utilizar a função mutate() e substituir os NA`s pela média entre as observações para cada variável.

# LotFrontage
train_quanti <- train_quanti %>%    
  mutate(LotFrontage=coalesce(LotFrontage,
                              mean(x =train_quanti$LotFrontage, na.rm = T )))

# MasVnrArea
train_quanti <- train_quanti %>%    
  mutate(MasVnrArea=coalesce(MasVnrArea,
                              mean(x =train_quanti$MasVnrArea, na.rm = T )))

# GarageYrBlt
train_quanti <- train_quanti %>%    
  mutate(GarageYrBlt=coalesce(GarageYrBlt,
                              mean(x =train_quanti$GarageYrBlt, na.rm = T )))

# Conferindo se ainda existe valores faltantes:
colSums(is.na(train_quanti))
           Id    MSSubClass   LotFrontage       LotArea   OverallQual 
            0             0             0             0             0 
  OverallCond     YearBuilt  YearRemodAdd    MasVnrArea    BsmtFinSF1 
            0             0             0             0             0 
   BsmtFinSF2     BsmtUnfSF   TotalBsmtSF     X1stFlrSF     X2ndFlrSF 
            0             0             0             0             0 
 LowQualFinSF     GrLivArea  BsmtFullBath  BsmtHalfBath      FullBath 
            0             0             0             0             0 
     HalfBath  BedroomAbvGr  KitchenAbvGr  TotRmsAbvGrd    Fireplaces 
            0             0             0             0             0 
  GarageYrBlt    GarageCars    GarageArea    WoodDeckSF   OpenPorchSF 
            0             0             0             0             0 
EnclosedPorch    X3SsnPorch   ScreenPorch      PoolArea       MiscVal 
            0             0             0             0             0 
       MoSold        YrSold     SalePrice 
            0             0             0 

5. Unindo os dataframes quali e quanti de treino

Após a limpeza dos dados de forma adequada de acordo com o tipo de dado, onde para os dados categóricos nós utilizamos o valor da moda para substituir os missing values e o valor da media para substituir os dados ausente nos dados quantitativos, podemos com segurança unir esse bancos de dados em apenas um banco de dado já tratado e pronto para a etapa de análise de dados.


# Unindo os bancos de dados

train.clean <- cbind(train_quali,train_quanti)
names(train.clean)
 [1] "MSZoning"      "Street"        "LotShape"      "LandContour"  
 [5] "Utilities"     "LotConfig"     "LandSlope"     "Neighborhood" 
 [9] "Condition1"    "Condition2"    "BldgType"      "HouseStyle"   
[13] "RoofStyle"     "RoofMatl"      "Exterior1st"   "Exterior2nd"  
[17] "MasVnrType"    "ExterQual"     "ExterCond"     "Foundation"   
[21] "BsmtQual"      "BsmtCond"      "BsmtExposure"  "BsmtFinType1" 
[25] "BsmtFinType2"  "Heating"       "HeatingQC"     "CentralAir"   
[29] "Electrical"    "KitchenQual"   "Functional"    "FireplaceQu"  
[33] "GarageType"    "GarageFinish"  "GarageQual"    "GarageCond"   
[37] "PavedDrive"    "SaleType"      "SaleCondition" "Id"           
[41] "MSSubClass"    "LotFrontage"   "LotArea"       "OverallQual"  
[45] "OverallCond"   "YearBuilt"     "YearRemodAdd"  "MasVnrArea"   
[49] "BsmtFinSF1"    "BsmtFinSF2"    "BsmtUnfSF"     "TotalBsmtSF"  
[53] "X1stFlrSF"     "X2ndFlrSF"     "LowQualFinSF"  "GrLivArea"    
[57] "BsmtFullBath"  "BsmtHalfBath"  "FullBath"      "HalfBath"     
[61] "BedroomAbvGr"  "KitchenAbvGr"  "TotRmsAbvGrd"  "Fireplaces"   
[65] "GarageYrBlt"   "GarageCars"    "GarageArea"    "WoodDeckSF"   
[69] "OpenPorchSF"   "EnclosedPorch" "X3SsnPorch"    "ScreenPorch"  
[73] "PoolArea"      "MiscVal"       "MoSold"        "YrSold"       
[77] "SalePrice"    

6. Limpando os dados de teste.

Da mesma forma que procedemos com os dados de treino iremos abordar agora os dados de teste e deixar todos os nossos bancos de dados prontos para a etapa de análise exploratória de dados. A primeira etapa será a separação em dados qualitativos e dados quantitativos, seguido da limpeza dos valores ausentes e por fim a união dos bancos quali e quanti de teste em apenas um dataframe tratado.

# Visualizando os dados de teste
glimpse(test)
Rows: 1,459
Columns: 80
$ Id            <int> 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, …
$ MSSubClass    <int> 20, 20, 60, 60, 120, 60, 20, 60, 20, 20, 120, 16…
$ MSZoning      <chr> "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
$ LotFrontage   <int> 80, 81, 74, 78, 43, 75, NA, 63, 85, 70, 26, 21, …
$ LotArea       <int> 11622, 14267, 13830, 9978, 5005, 10000, 7980, 84…
$ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", …
$ Alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ LotShape      <chr> "Reg", "IR1", "IR1", "IR1", "IR1", "IR1", "IR1",…
$ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "HLS", "Lvl", "Lvl",…
$ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub"…
$ LotConfig     <chr> "Inside", "Corner", "Inside", "Inside", "Inside"…
$ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl",…
$ Neighborhood  <chr> "NAmes", "NAmes", "Gilbert", "Gilbert", "StoneBr…
$ Condition1    <chr> "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm",…
$ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", …
$ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "TwnhsE", "1Fam"…
$ HouseStyle    <chr> "1Story", "1Story", "2Story", "2Story", "1Story"…
$ OverallQual   <int> 5, 6, 5, 6, 8, 6, 6, 6, 7, 4, 7, 6, 5, 6, 7, 9, …
$ OverallCond   <int> 6, 6, 5, 6, 5, 5, 7, 5, 5, 5, 5, 5, 5, 6, 6, 5, …
$ YearBuilt     <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ YearRemodAdd  <int> 1961, 1958, 1998, 1998, 1992, 1994, 2007, 1998, …
$ RoofStyle     <chr> "Gable", "Hip", "Gable", "Gable", "Gable", "Gabl…
$ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "Com…
$ Exterior1st   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ Exterior2nd   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ MasVnrType    <chr> "None", "BrkFace", "None", "BrkFace", "None", "N…
$ MasVnrArea    <int> 0, 108, 0, 20, 0, 0, 0, 0, 0, 0, 0, 504, 492, 0,…
$ ExterQual     <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "TA", "TA", …
$ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "Gd", "TA", …
$ Foundation    <chr> "CBlock", "CBlock", "PConc", "PConc", "PConc", "…
$ BsmtQual      <chr> "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", "Gd", …
$ BsmtCond      <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ BsmtExposure  <chr> "No", "No", "No", "No", "No", "No", "No", "No", …
$ BsmtFinType1  <chr> "Rec", "ALQ", "GLQ", "GLQ", "ALQ", "Unf", "ALQ",…
$ BsmtFinSF1    <int> 468, 923, 791, 602, 263, 0, 935, 0, 637, 804, 10…
$ BsmtFinType2  <chr> "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf",…
$ BsmtFinSF2    <int> 144, 0, 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, …
$ BsmtUnfSF     <int> 270, 406, 137, 324, 1017, 763, 233, 789, 663, 0,…
$ TotalBsmtSF   <int> 882, 1329, 928, 926, 1280, 763, 1168, 789, 1300,…
$ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", …
$ HeatingQC     <chr> "TA", "TA", "Gd", "Ex", "Ex", "Gd", "Ex", "Gd", …
$ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB…
$ X1stFlrSF     <int> 896, 1329, 928, 926, 1280, 763, 1187, 789, 1341,…
$ X2ndFlrSF     <int> 0, 0, 701, 678, 0, 892, 0, 676, 0, 0, 0, 504, 56…
$ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ GrLivArea     <int> 896, 1329, 1629, 1604, 1280, 1655, 1187, 1465, 1…
$ BsmtFullBath  <int> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, …
$ BsmtHalfBath  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ FullBath      <int> 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, …
$ HalfBath      <int> 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, …
$ BedroomAbvGr  <int> 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 2, 3, …
$ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ KitchenQual   <chr> "TA", "Gd", "TA", "Gd", "Gd", "TA", "TA", "TA", …
$ TotRmsAbvGrd  <int> 5, 6, 6, 7, 5, 7, 6, 7, 5, 4, 5, 5, 6, 6, 4, 10,…
$ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ",…
$ Fireplaces    <int> 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, …
$ FireplaceQu   <chr> NA, NA, "TA", "Gd", NA, "TA", NA, "Gd", "Po", NA…
$ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd"…
$ GarageYrBlt   <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ GarageFinish  <chr> "Unf", "Unf", "Fin", "Fin", "RFn", "Fin", "Fin",…
$ GarageCars    <int> 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 3, …
$ GarageArea    <int> 730, 312, 482, 470, 506, 440, 420, 393, 506, 525…
$ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ WoodDeckSF    <int> 140, 393, 212, 360, 0, 157, 483, 0, 192, 240, 20…
$ OpenPorchSF   <int> 0, 36, 34, 36, 82, 84, 21, 75, 0, 0, 68, 0, 0, 0…
$ EnclosedPorch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ X3SsnPorch    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ ScreenPorch   <int> 120, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ PoolQC        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Fence         <chr> "MnPrv", NA, "MnPrv", NA, NA, NA, "GdPrv", NA, N…
$ MiscFeature   <chr> NA, "Gar2", NA, NA, NA, NA, "Shed", NA, NA, NA, …
$ MiscVal       <int> 0, 12500, 0, 0, 0, 0, 500, 0, 0, 0, 0, 0, 0, 0, …
$ MoSold        <int> 6, 6, 3, 6, 1, 4, 3, 5, 2, 4, 6, 2, 3, 6, 6, 1, …
$ YrSold        <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, …
$ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", …
$ SaleCondition <chr> "Normal", "Normal", "Normal", "Normal", "Normal"…
# separando os dados de teste em quanti e quali

test_quanti <- select_if(test, is.numeric)

test_quali <- select_if(test, is.character)

# Visualizando o resultado

glimpse(test_quali)
Rows: 1,459
Columns: 43
$ MSZoning      <chr> "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
$ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", …
$ Alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ LotShape      <chr> "Reg", "IR1", "IR1", "IR1", "IR1", "IR1", "IR1",…
$ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "HLS", "Lvl", "Lvl",…
$ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub"…
$ LotConfig     <chr> "Inside", "Corner", "Inside", "Inside", "Inside"…
$ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl",…
$ Neighborhood  <chr> "NAmes", "NAmes", "Gilbert", "Gilbert", "StoneBr…
$ Condition1    <chr> "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm",…
$ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", …
$ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "TwnhsE", "1Fam"…
$ HouseStyle    <chr> "1Story", "1Story", "2Story", "2Story", "1Story"…
$ RoofStyle     <chr> "Gable", "Hip", "Gable", "Gable", "Gable", "Gabl…
$ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "Com…
$ Exterior1st   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ Exterior2nd   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ MasVnrType    <chr> "None", "BrkFace", "None", "BrkFace", "None", "N…
$ ExterQual     <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "TA", "TA", …
$ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "Gd", "TA", …
$ Foundation    <chr> "CBlock", "CBlock", "PConc", "PConc", "PConc", "…
$ BsmtQual      <chr> "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", "Gd", …
$ BsmtCond      <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ BsmtExposure  <chr> "No", "No", "No", "No", "No", "No", "No", "No", …
$ BsmtFinType1  <chr> "Rec", "ALQ", "GLQ", "GLQ", "ALQ", "Unf", "ALQ",…
$ BsmtFinType2  <chr> "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf",…
$ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", …
$ HeatingQC     <chr> "TA", "TA", "Gd", "Ex", "Ex", "Gd", "Ex", "Gd", …
$ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB…
$ KitchenQual   <chr> "TA", "Gd", "TA", "Gd", "Gd", "TA", "TA", "TA", …
$ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ",…
$ FireplaceQu   <chr> NA, NA, "TA", "Gd", NA, "TA", NA, "Gd", "Po", NA…
$ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd"…
$ GarageFinish  <chr> "Unf", "Unf", "Fin", "Fin", "RFn", "Fin", "Fin",…
$ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ PoolQC        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Fence         <chr> "MnPrv", NA, "MnPrv", NA, NA, NA, "GdPrv", NA, N…
$ MiscFeature   <chr> NA, "Gar2", NA, NA, NA, NA, "Shed", NA, NA, NA, …
$ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", …
$ SaleCondition <chr> "Normal", "Normal", "Normal", "Normal", "Normal"…
glimpse(test_quanti)
Rows: 1,459
Columns: 37
$ Id            <int> 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, …
$ MSSubClass    <int> 20, 20, 60, 60, 120, 60, 20, 60, 20, 20, 120, 16…
$ LotFrontage   <int> 80, 81, 74, 78, 43, 75, NA, 63, 85, 70, 26, 21, …
$ LotArea       <int> 11622, 14267, 13830, 9978, 5005, 10000, 7980, 84…
$ OverallQual   <int> 5, 6, 5, 6, 8, 6, 6, 6, 7, 4, 7, 6, 5, 6, 7, 9, …
$ OverallCond   <int> 6, 6, 5, 6, 5, 5, 7, 5, 5, 5, 5, 5, 5, 6, 6, 5, …
$ YearBuilt     <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ YearRemodAdd  <int> 1961, 1958, 1998, 1998, 1992, 1994, 2007, 1998, …
$ MasVnrArea    <int> 0, 108, 0, 20, 0, 0, 0, 0, 0, 0, 0, 504, 492, 0,…
$ BsmtFinSF1    <int> 468, 923, 791, 602, 263, 0, 935, 0, 637, 804, 10…
$ BsmtFinSF2    <int> 144, 0, 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, …
$ BsmtUnfSF     <int> 270, 406, 137, 324, 1017, 763, 233, 789, 663, 0,…
$ TotalBsmtSF   <int> 882, 1329, 928, 926, 1280, 763, 1168, 789, 1300,…
$ X1stFlrSF     <int> 896, 1329, 928, 926, 1280, 763, 1187, 789, 1341,…
$ X2ndFlrSF     <int> 0, 0, 701, 678, 0, 892, 0, 676, 0, 0, 0, 504, 56…
$ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ GrLivArea     <int> 896, 1329, 1629, 1604, 1280, 1655, 1187, 1465, 1…
$ BsmtFullBath  <int> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, …
$ BsmtHalfBath  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ FullBath      <int> 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, …
$ HalfBath      <int> 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, …
$ BedroomAbvGr  <int> 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 2, 3, …
$ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ TotRmsAbvGrd  <int> 5, 6, 6, 7, 5, 7, 6, 7, 5, 4, 5, 5, 6, 6, 4, 10,…
$ Fireplaces    <int> 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, …
$ GarageYrBlt   <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ GarageCars    <int> 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 3, …
$ GarageArea    <int> 730, 312, 482, 470, 506, 440, 420, 393, 506, 525…
$ WoodDeckSF    <int> 140, 393, 212, 360, 0, 157, 483, 0, 192, 240, 20…
$ OpenPorchSF   <int> 0, 36, 34, 36, 82, 84, 21, 75, 0, 0, 68, 0, 0, 0…
$ EnclosedPorch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ X3SsnPorch    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ ScreenPorch   <int> 120, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ MiscVal       <int> 0, 12500, 0, 0, 0, 0, 500, 0, 0, 0, 0, 0, 0, 0, …
$ MoSold        <int> 6, 6, 3, 6, 1, 4, 3, 5, 2, 4, 6, 2, 3, 6, 6, 1, …
$ YrSold        <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, …
# Tratando os dados qualitativos

colSums(is.na(test_quali))
     MSZoning        Street         Alley      LotShape   LandContour 
            4             0          1352             0             0 
    Utilities     LotConfig     LandSlope  Neighborhood    Condition1 
            2             0             0             0             0 
   Condition2      BldgType    HouseStyle     RoofStyle      RoofMatl 
            0             0             0             0             0 
  Exterior1st   Exterior2nd    MasVnrType     ExterQual     ExterCond 
            1             1            16             0             0 
   Foundation      BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1 
            0            44            45            44            42 
 BsmtFinType2       Heating     HeatingQC    CentralAir    Electrical 
           42             0             0             0             0 
  KitchenQual    Functional   FireplaceQu    GarageType  GarageFinish 
            1             2           730            76            78 
   GarageQual    GarageCond    PavedDrive        PoolQC         Fence 
           78            78             0          1456          1169 
  MiscFeature      SaleType SaleCondition 
         1408             1             0 
# Observe que temos os mesmos problemas, existem variável que são praticamente nulas, sendo
# a melhor escolha deletar elas do nosso banco de dados. 

test_quali <- test_quali %>% select(-c(Alley, PoolQC, Fence, MiscFeature))
glimpse(test_quali)
Rows: 1,459
Columns: 39
$ MSZoning      <chr> "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
$ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", …
$ LotShape      <chr> "Reg", "IR1", "IR1", "IR1", "IR1", "IR1", "IR1",…
$ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "HLS", "Lvl", "Lvl",…
$ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub"…
$ LotConfig     <chr> "Inside", "Corner", "Inside", "Inside", "Inside"…
$ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl",…
$ Neighborhood  <chr> "NAmes", "NAmes", "Gilbert", "Gilbert", "StoneBr…
$ Condition1    <chr> "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm",…
$ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", …
$ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "TwnhsE", "1Fam"…
$ HouseStyle    <chr> "1Story", "1Story", "2Story", "2Story", "1Story"…
$ RoofStyle     <chr> "Gable", "Hip", "Gable", "Gable", "Gable", "Gabl…
$ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "Com…
$ Exterior1st   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ Exterior2nd   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ MasVnrType    <chr> "None", "BrkFace", "None", "BrkFace", "None", "N…
$ ExterQual     <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "TA", "TA", …
$ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "Gd", "TA", …
$ Foundation    <chr> "CBlock", "CBlock", "PConc", "PConc", "PConc", "…
$ BsmtQual      <chr> "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", "Gd", …
$ BsmtCond      <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ BsmtExposure  <chr> "No", "No", "No", "No", "No", "No", "No", "No", …
$ BsmtFinType1  <chr> "Rec", "ALQ", "GLQ", "GLQ", "ALQ", "Unf", "ALQ",…
$ BsmtFinType2  <chr> "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf",…
$ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", …
$ HeatingQC     <chr> "TA", "TA", "Gd", "Ex", "Ex", "Gd", "Ex", "Gd", …
$ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB…
$ KitchenQual   <chr> "TA", "Gd", "TA", "Gd", "Gd", "TA", "TA", "TA", …
$ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ",…
$ FireplaceQu   <chr> NA, NA, "TA", "Gd", NA, "TA", NA, "Gd", "Po", NA…
$ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd"…
$ GarageFinish  <chr> "Unf", "Unf", "Fin", "Fin", "RFn", "Fin", "Fin",…
$ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", …
$ SaleCondition <chr> "Normal", "Normal", "Normal", "Normal", "Normal"…
# Aplicando o mesmo loop for para tratar os dado de teste qualitativos.

for(item in colnames(test_quali)){
  #print(item) 
  x = data.frame(sort(table(test_quali[item])))
  #print(x)
  mode = tail(x, n = 1)
  #print(mode)
  moda = (mode[1,1])
  moda <- as.character(moda)
  #print(train_quali[item])
  test_quali <- test_quali %>% 
  mutate(across(item, ~ case_when(is.na(.) ~ moda, TRUE ~ .)))
}

# Conferindo a soma dos valores nulos nas colunas após a transformção 
unique(test_quali$MasVnrType)
[1] "None"    "BrkFace" "Stone"   "BrkCmn" 
table(test_quali$MasVnrType)

 BrkCmn BrkFace    None   Stone 
     10     434     894     121 
colSums(is.na(test_quali))
     MSZoning        Street      LotShape   LandContour     Utilities 
            0             0             0             0             0 
    LotConfig     LandSlope  Neighborhood    Condition1    Condition2 
            0             0             0             0             0 
     BldgType    HouseStyle     RoofStyle      RoofMatl   Exterior1st 
            0             0             0             0             0 
  Exterior2nd    MasVnrType     ExterQual     ExterCond    Foundation 
            0             0             0             0             0 
     BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1  BsmtFinType2 
            0             0             0             0             0 
      Heating     HeatingQC    CentralAir    Electrical   KitchenQual 
            0             0             0             0             0 
   Functional   FireplaceQu    GarageType  GarageFinish    GarageQual 
            0             0             0             0             0 
   GarageCond    PavedDrive      SaleType SaleCondition 
            0             0             0             0 
#############################

# Tratando os dados quantitativos 

# visuaizando os dados
glimpse(test_quanti)
Rows: 1,459
Columns: 37
$ Id            <int> 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, …
$ MSSubClass    <int> 20, 20, 60, 60, 120, 60, 20, 60, 20, 20, 120, 16…
$ LotFrontage   <int> 80, 81, 74, 78, 43, 75, NA, 63, 85, 70, 26, 21, …
$ LotArea       <int> 11622, 14267, 13830, 9978, 5005, 10000, 7980, 84…
$ OverallQual   <int> 5, 6, 5, 6, 8, 6, 6, 6, 7, 4, 7, 6, 5, 6, 7, 9, …
$ OverallCond   <int> 6, 6, 5, 6, 5, 5, 7, 5, 5, 5, 5, 5, 5, 6, 6, 5, …
$ YearBuilt     <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ YearRemodAdd  <int> 1961, 1958, 1998, 1998, 1992, 1994, 2007, 1998, …
$ MasVnrArea    <int> 0, 108, 0, 20, 0, 0, 0, 0, 0, 0, 0, 504, 492, 0,…
$ BsmtFinSF1    <int> 468, 923, 791, 602, 263, 0, 935, 0, 637, 804, 10…
$ BsmtFinSF2    <int> 144, 0, 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, …
$ BsmtUnfSF     <int> 270, 406, 137, 324, 1017, 763, 233, 789, 663, 0,…
$ TotalBsmtSF   <int> 882, 1329, 928, 926, 1280, 763, 1168, 789, 1300,…
$ X1stFlrSF     <int> 896, 1329, 928, 926, 1280, 763, 1187, 789, 1341,…
$ X2ndFlrSF     <int> 0, 0, 701, 678, 0, 892, 0, 676, 0, 0, 0, 504, 56…
$ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ GrLivArea     <int> 896, 1329, 1629, 1604, 1280, 1655, 1187, 1465, 1…
$ BsmtFullBath  <int> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, …
$ BsmtHalfBath  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ FullBath      <int> 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, …
$ HalfBath      <int> 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, …
$ BedroomAbvGr  <int> 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 2, 3, …
$ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ TotRmsAbvGrd  <int> 5, 6, 6, 7, 5, 7, 6, 7, 5, 4, 5, 5, 6, 6, 4, 10,…
$ Fireplaces    <int> 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, …
$ GarageYrBlt   <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ GarageCars    <int> 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 3, …
$ GarageArea    <int> 730, 312, 482, 470, 506, 440, 420, 393, 506, 525…
$ WoodDeckSF    <int> 140, 393, 212, 360, 0, 157, 483, 0, 192, 240, 20…
$ OpenPorchSF   <int> 0, 36, 34, 36, 82, 84, 21, 75, 0, 0, 68, 0, 0, 0…
$ EnclosedPorch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ X3SsnPorch    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ ScreenPorch   <int> 120, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ MiscVal       <int> 0, 12500, 0, 0, 0, 0, 500, 0, 0, 0, 0, 0, 0, 0, …
$ MoSold        <int> 6, 6, 3, 6, 1, 4, 3, 5, 2, 4, 6, 2, 3, 6, 6, 1, …
$ YrSold        <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, …
# Conferindo os valores ausentes

colSums(is.na(test_quanti))
           Id    MSSubClass   LotFrontage       LotArea   OverallQual 
            0             0           227             0             0 
  OverallCond     YearBuilt  YearRemodAdd    MasVnrArea    BsmtFinSF1 
            0             0             0            15             1 
   BsmtFinSF2     BsmtUnfSF   TotalBsmtSF     X1stFlrSF     X2ndFlrSF 
            1             1             1             0             0 
 LowQualFinSF     GrLivArea  BsmtFullBath  BsmtHalfBath      FullBath 
            0             0             2             2             0 
     HalfBath  BedroomAbvGr  KitchenAbvGr  TotRmsAbvGrd    Fireplaces 
            0             0             0             0             0 
  GarageYrBlt    GarageCars    GarageArea    WoodDeckSF   OpenPorchSF 
           78             1             1             0             0 
EnclosedPorch    X3SsnPorch   ScreenPorch      PoolArea       MiscVal 
            0             0             0             0             0 
       MoSold        YrSold 
            0             0 
# Ao todo temos 8 variaveis que apresentam valores ausentes.
test_quanti <- test_quanti %>%    
  mutate(LotFrontage=coalesce(LotFrontage,
                              mean(x =test_quanti$LotFrontage, na.rm = T )))
test_quanti <- test_quanti %>%    
  mutate(MasVnrArea=coalesce(MasVnrArea,
                              mean(x =test_quanti$MasVnrArea, na.rm = T )))
test_quanti <- test_quanti %>%    
  mutate(BsmtFinSF1 =coalesce(BsmtFinSF1 ,
                              mean(x =test_quanti$BsmtFinSF1 , na.rm = T )))

test_quanti <- test_quanti %>%    
  mutate(BsmtFinSF2 =coalesce(BsmtFinSF2 ,
                              mean(x =test_quanti$BsmtFinSF2 , na.rm = T )))

test_quanti <- test_quanti %>%    
  mutate(BsmtUnfSF =coalesce(BsmtUnfSF ,
                              mean(x =test_quanti$BsmtUnfSF , na.rm = T )))


test_quanti <- test_quanti %>%    
  mutate(TotalBsmtSF =coalesce(TotalBsmtSF ,
                              mean(x =test_quanti$TotalBsmtSF , na.rm = T )))

test_quanti <- test_quanti %>%    
  mutate(BsmtFullBath =coalesce(BsmtFullBath ,
                              mean(x =test_quanti$BsmtFullBath , na.rm = T )))

test_quanti <- test_quanti %>%    
  mutate(BsmtHalfBath =coalesce(BsmtHalfBath ,
                              mean(x =test_quanti$BsmtHalfBath , na.rm = T )))


test_quanti <- test_quanti %>%    
  mutate(GarageYrBlt =coalesce(GarageYrBlt ,
                              mean(x =test_quanti$GarageYrBlt , na.rm = T )))


test_quanti <- test_quanti %>%    
  mutate(GarageCars =coalesce(GarageCars ,
                              mean(x =test_quanti$GarageCars , na.rm = T )))


test_quanti <- test_quanti %>%    
  mutate(GarageArea =coalesce(GarageArea ,
                              mean(x =test_quanti$GarageArea , na.rm = T )))

colSums(is.na(test_quanti))
           Id    MSSubClass   LotFrontage       LotArea   OverallQual 
            0             0             0             0             0 
  OverallCond     YearBuilt  YearRemodAdd    MasVnrArea    BsmtFinSF1 
            0             0             0             0             0 
   BsmtFinSF2     BsmtUnfSF   TotalBsmtSF     X1stFlrSF     X2ndFlrSF 
            0             0             0             0             0 
 LowQualFinSF     GrLivArea  BsmtFullBath  BsmtHalfBath      FullBath 
            0             0             0             0             0 
     HalfBath  BedroomAbvGr  KitchenAbvGr  TotRmsAbvGrd    Fireplaces 
            0             0             0             0             0 
  GarageYrBlt    GarageCars    GarageArea    WoodDeckSF   OpenPorchSF 
            0             0             0             0             0 
EnclosedPorch    X3SsnPorch   ScreenPorch      PoolArea       MiscVal 
            0             0             0             0             0 
       MoSold        YrSold 
            0             0 
# Juntando os dataframes

test.clean <- cbind(test_quali,test_quanti)

Por fim, cabe agora conferir os dados de treino e teste tratados


colSums(is.na(train.clean))
     MSZoning        Street      LotShape   LandContour     Utilities 
            0             0             0             0             0 
    LotConfig     LandSlope  Neighborhood    Condition1    Condition2 
            0             0             0             0             0 
     BldgType    HouseStyle     RoofStyle      RoofMatl   Exterior1st 
            0             0             0             0             0 
  Exterior2nd    MasVnrType     ExterQual     ExterCond    Foundation 
            0             0             0             0             0 
     BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1  BsmtFinType2 
            0             0             0             0             0 
      Heating     HeatingQC    CentralAir    Electrical   KitchenQual 
            0             0             0             0             0 
   Functional   FireplaceQu    GarageType  GarageFinish    GarageQual 
            0             0             0             0             0 
   GarageCond    PavedDrive      SaleType SaleCondition            Id 
            0             0             0             0             0 
   MSSubClass   LotFrontage       LotArea   OverallQual   OverallCond 
            0             0             0             0             0 
    YearBuilt  YearRemodAdd    MasVnrArea    BsmtFinSF1    BsmtFinSF2 
            0             0             0             0             0 
    BsmtUnfSF   TotalBsmtSF     X1stFlrSF     X2ndFlrSF  LowQualFinSF 
            0             0             0             0             0 
    GrLivArea  BsmtFullBath  BsmtHalfBath      FullBath      HalfBath 
            0             0             0             0             0 
 BedroomAbvGr  KitchenAbvGr  TotRmsAbvGrd    Fireplaces   GarageYrBlt 
            0             0             0             0             0 
   GarageCars    GarageArea    WoodDeckSF   OpenPorchSF EnclosedPorch 
            0             0             0             0             0 
   X3SsnPorch   ScreenPorch      PoolArea       MiscVal        MoSold 
            0             0             0             0             0 
       YrSold     SalePrice 
            0             0 
colSums(is.na(test.clean))
     MSZoning        Street      LotShape   LandContour     Utilities 
            0             0             0             0             0 
    LotConfig     LandSlope  Neighborhood    Condition1    Condition2 
            0             0             0             0             0 
     BldgType    HouseStyle     RoofStyle      RoofMatl   Exterior1st 
            0             0             0             0             0 
  Exterior2nd    MasVnrType     ExterQual     ExterCond    Foundation 
            0             0             0             0             0 
     BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1  BsmtFinType2 
            0             0             0             0             0 
      Heating     HeatingQC    CentralAir    Electrical   KitchenQual 
            0             0             0             0             0 
   Functional   FireplaceQu    GarageType  GarageFinish    GarageQual 
            0             0             0             0             0 
   GarageCond    PavedDrive      SaleType SaleCondition            Id 
            0             0             0             0             0 
   MSSubClass   LotFrontage       LotArea   OverallQual   OverallCond 
            0             0             0             0             0 
    YearBuilt  YearRemodAdd    MasVnrArea    BsmtFinSF1    BsmtFinSF2 
            0             0             0             0             0 
    BsmtUnfSF   TotalBsmtSF     X1stFlrSF     X2ndFlrSF  LowQualFinSF 
            0             0             0             0             0 
    GrLivArea  BsmtFullBath  BsmtHalfBath      FullBath      HalfBath 
            0             0             0             0             0 
 BedroomAbvGr  KitchenAbvGr  TotRmsAbvGrd    Fireplaces   GarageYrBlt 
            0             0             0             0             0 
   GarageCars    GarageArea    WoodDeckSF   OpenPorchSF EnclosedPorch 
            0             0             0             0             0 
   X3SsnPorch   ScreenPorch      PoolArea       MiscVal        MoSold 
            0             0             0             0             0 
       YrSold 
            0 

7. Análise Exploratória de Dados.

Nesta etapa iniciaremos uma analise descritiva tanto das variáveis qualitativas como quantitativas do banco de dados de treino. Para os dados quantitativos abordaremos em primeira instancia uma investigação por estatistica decritiva univariada e depois bivariada para entender a sua relação com a variavel Y do problema que é a SalesPrice.

7.1 Estatistica Descritiva para dados Qualitativos.

A estatistica univariada para contempla tabelas de ferequencia de ocorrencia o que já vimos anteriormente ao obtermos a moda, representação gráfica da distribuição e medidas de localização, dispersão ou variabilidade e medidas de forma(assimetria e curtosis).

A tabela de distribuição de frequências é calculada para cada valor discreto da variável. A frequência pode ser absoluta que informa o número de ocorrências de cada elemento i na amostra, pode ser uma frequência relativa que fornece a porcentagem % relativa à frequência absoluta, pode ser uma frequência acumulada que representa a soma de todos os elementos e por fim pode ser uma frequência relativa acumulada que é a frequencia relativa à acumulada.

# Barras empilhadas
ggplotly(
tabela.freq.MSZoning %>% 
  ggplot(aes(x = "", y = Freq, fill = MSZoning, label = Freq))+
  geom_bar(stat = "identity", width = 0.3) + coord_flip() + 
  geom_label(position = position_stack(vjust = 0.5))+
  theme_void()
)
Warning: geom_GeomLabel() has yet to be implemented in plotly.
  If you'd like to see this geom implemented,
  Please open an issue with your example code at
  https://github.com/ropensci/plotly/issuesWarning: geom_GeomLabel() has yet to be implemented in plotly.
  If you'd like to see this geom implemented,
  Please open an issue with your example code at
  https://github.com/ropensci/plotly/issuesWarning: geom_GeomLabel() has yet to be implemented in plotly.
  If you'd like to see this geom implemented,
  Please open an issue with your example code at
  https://github.com/ropensci/plotly/issuesWarning: geom_GeomLabel() has yet to be implemented in plotly.
  If you'd like to see this geom implemented,
  Please open an issue with your example code at
  https://github.com/ropensci/plotly/issuesWarning: geom_GeomLabel() has yet to be implemented in plotly.
  If you'd like to see this geom implemented,
  Please open an issue with your example code at
  https://github.com/ropensci/plotly/issues

Desta maneira criamos uma forma eficiente de exploramos o banco de dados para as variáveis categóricas. Como temos 39 variáveis exploratórias o melhor a fazer é criar um função que nos retorne essas três informações, a tabela de frequência, o grafico de barras verticais e grafico de barras empilhadas.

Então vamos colocar a mão na massa!!

resultado_HouseStyle
[[1]]

[[2]]

[[3]]
NA
LS0tDQp0aXRsZTogIkhvdXNlIHByaWNlcyAtIEthZ2dsZSBjaGFsbGVuZ2UiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIDEuT2JqZXRpdm8gZG8gZGVzYWZpbw0KDQogIE8gb2JqZXRpdm8gZG8gZGVzYWZpbyBIb3VzZSBwcmljZXMgw6kgcHJldmVyIG8gdmFsb3IgZGUgdmVuZGEgZGUgdW0gaW3Ds3ZlbCBkZSBhY29yZG8gY29tIGFzIDc5IHZhcmnDoXZlaXMgZG8gYmFuY28gZGUgZGFkb3MuIE8gZGVzYWZpbyBkZXNwb25pYmlsaXphIDQgYXJxdWl2b3MsIHVtIGRhdGFzZXQgZGUgdHJlaW5vIGUgdW0gZGF0YXNldCBkZSB0ZXN0ZSBhbWJvcyBlbSBjc3YsIHVtIGFycXVpdm8gdHh0IGNvbSBhIGRlc2NyacOnw6NvIGRhcyBjb2x1bmFzIGRvIGRhdGFzZXQgdHJlaW5vIGUgdW0gYXJxdWl2byBjc3YgZXhlbXBsbyBkZSBjb21vIG8gcmVzdWx0YWRvIGRldmUgc2VyIHN1Ym1ldGlkby4NCiAgUGFyYSBhY2Vzc2FyIG1haXMgaW5mb3JtYcOnw7VlcyBzb2JyZSBvIGRlc2FmaW8gZSBmYXplciBkb3dubG9hZCBkb3MgZGFkb3MgYmFzdGEgYWNlc3NhciBvIFtsaW5rXShodHRwczovL3d3dy5rYWdnbGUuY29tL2NvbXBldGl0aW9ucy9ob3VzZS1wcmljZXMtYWR2YW5jZWQtcmVncmVzc2lvbi10ZWNobmlxdWVzL292ZXJ2aWV3KS4NCiAgDQojIDIuIExlbmRvIG9zIGFycXVpdm9zLg0KDQpgYGB7cn0NCiMgQ2FycmVnYW5kbyBvIHRpZHl2ZXJzZQ0KDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCg0KIyBTZXRhbmRvIG8gZGlyZXTDs3JpbyBkZSB0cmFiYWxobw0Kc2V0d2QoZGlyID0gIkM6L1VzZXJzL3JvZG9sZm8ucGF1bGEvRGVza3RvcC9QRVNTT0FMX1JPRE9MRk8vQmV0dGVyIERlY2lzaW9ucy9zY3JpcHRzL2thZ2dsZS9ob3VzZV9wcmljZXMiKQ0KDQojIExlbmRvIG9zIGFycXVpdm9zIGRlIHRyZWlubyBlIGRlIHRlc3RlDQp0cmFpbiA9IGRhdGEuZnJhbWUocmVhZC5jc3YoInRyYWluLmNzdiIpKQ0KdGVzdCA9IGRhdGEuZnJhbWUocmVhZC5jc3YoInRlc3QuY3N2IikpDQoNCiMgVmlzdWFsaXphbmRvIGFzIGRpbWVuc8O1ZXMgZG9zIGFycXVpdm9zDQpkaW0odHJhaW4pDQpkaW0odGVzdCkNCg0KDQojIFZpc3VhbGl6YW5kbyBvIGRhdGFzZXQgZGUgdHJlaW5vIA0KZ2xpbXBzZSh0cmFpbikNCg0KIyBPYnRlbmRvIHVtIHN1bcOhcmlvIGRvcyBkYWRvcyBkZSB0cmVpbm8NCg0Kc3VtbWFyeSh0cmFpbikNCg0KYGBgDQpVdGlsaXphbmRvIG8gc3Vtw6FyaW8gZmljYSBjbGFybyBxdWUgbyBiYW5jbyBkZSBkYWRvcyBkZSB0cmVpbm8gcG9zc3VpIG11aXRhcyB2YXJpw6F2ZWlzLiBWYW1vcyBpbmNpYWxtZW50ZSBzZXBhcmFyIG8gYmFuY28gZGUgZGFkb3MgZW0gdmFyacOhdmVpcyBxdWFsaXRhdGl2YXMgZSB2YXJpw6F2ZWlzIHF1YW50aXRhdGl2YXMsIGFzc2ltIHNlIHRvcm5hIG1haXMgZmFjaWwgbyB0cmF0YW1lbnRvIGUgbGltcGV6YSBkb3MgZGFkb3MuDQoNClBhcmEgYXMgdmFyacOhdmVpcyBxdWFsaXRhdGl2YXMgcXVlIGFwcmVzZW50ZW0gdmFsb3JlcyBmYWx0YW50ZXMgb3UgTkFgcyB2YW1vcyBzdWJzdGl0dWlyIHBlbGEgbW9kYSBkYXF1ZWxhIHZhcmnDoXZlbCBlIHBhcmEgYXMgdmFyacOhdmVpcyBxdWFudGl0YXRpdmFzIGRlIHF1ZSBhcHJlc2VudGVtIHZhbG9yZXMgZmFsdGFudGVzIHZhbW9zIHN1YnN0aXR1aXIgcGVsYSBtw6lkaWEuIA0KDQpBIGNvbHVuYSBJZCBzZXLDoSBlbGltaW5hZGEgcG9yIHNlciB1bSB2YWxvciBzZXF1ZW5jaWFsIHF1ZSBuYW8gcmVwcmVzZW50YSBuZWh1bWEgaW5mb3JtYcOnw6NvIMO6dGlsIGNvbW8gdmFyacOhdmVsIGV4cGxpY2F0aXZhIGNhcGF6IGRlIGluZmx1ZW5jaWFyIG5vIHZhbG9yIHByZWRpdG8gZG8gcHJlw6dvIGRlIHZlbmRhIGRlIHVtYSBjYXNhLg0KDQpQYXJhIHNlcGFyYXIgbyBkYXRhc2V0IGVtIGRhZG9zIHF1YW50aXRhdGl2b3MgZSBxdWFsaXRhdGl2b3MgbyBtZWxob3IgY2FtaW5obyDDqSB1c2FyYSBhIGZ1bsOnw6NvIFtzZWxlY3RfaWYoKV0oaHR0cHM6Ly93d3cucmRvY3VtZW50YXRpb24ub3JnL3BhY2thZ2VzL2RwbHlyL3ZlcnNpb25zLzAuNS4wL3RvcGljcy9zZWxlY3RfaWYpIGRvIHBhY290ZSBkcGx5cg0KDQoNCmBgYHtyfQ0KDQojIFNlcGFyYW5kbyBvIGRhdGFmcmFtZSBlbSB0cmFpbiBxdWFudGkgZSB0cmFpbiBxdWFsaSB1c2FuZG8gYSBmdW7Dp8OjbyBzZWxlY3RfaWYNCg0KdHJhaW5fcXVhbnRpIDwtIHNlbGVjdF9pZih0cmFpbiwgaXMubnVtZXJpYykNCnRyYWluX3F1YWxpIDwtIHNlbGVjdF9pZih0cmFpbiwgaXMuY2hhcmFjdGVyKQ0KDQoNCmBgYA0KIyAzLiBMaW1wZXNhIGRlIGRhZG9zIFF1YWxpdGF0aXZvcw0KDQpgYGB7cn0NCiNWaXN1YWxpemFuZG8gb3MgZGFkb3MgDQpnbGltcHNlKHRyYWluX3F1YWxpKQ0KDQojIFByb2N1cmFuZG8gVmFsb3JlcyBtaXNzaW5nDQpjb2xTdW1zKGlzLm5hKHRyYWluX3F1YWxpKSkNCg0KYGBgDQoNCg0KQ29tIGVzc2EgYW5hbGlzZSBhIGNpbWEgc2FiZW1vcyBvIHNlZ3VpbnRlIHNvYnJlIG9zIG5vc3NvcyBkYWRvcyBjYXRlZ8Ozcmljb3M6DQoNCi0gVGVtb3MgMTQ2MCBvYnNlcnZhw6fDtWVzIGUgNDMgdmFyacOhdmVpcw0KLSBEYXMgNDMgdmFyacOhdmVpcyAxNSBhcHJlc2VudGFtIHZhbG9yZXMgZmFsdGFudGVzIChOQSkNCi0gRGFzIDE1IHZhcmnDoXZlaXMgY29tIHZhbG9yZXMgbnVsb3MgcG9kZW1vcyBjb25jbHVpciBxdWU6DQogIC0tQWxsZXkgICwgUG9vbFFDLCBGZW5jZSBlIE1pc0ZlYXR1cmUgc8OjbyBtYWlzIGRlIDkwJSBkZSB2YWxvcmVzIGZhbHRhbnRlcywgc2VuZG8gbWFpcyBjb2VyZW50ZSBhIGV4Y2x1c8OjbyBkZXN0YXMgZG8gbm9zc28gYmFuY28gZGUgZGFkby4NCi0gQXMgdmFyacOhdmVpcyByZXN0YW50ZXMgcXVlIGFwcmVzZW50YW0gdmFsb3JlcyBudWxvcyBzZXLDo28gdHJhdGFkYXMgZSBvIHZhbG9yIGRhIG1vZGEgc2Vyw6EgdXNhZG8gcGFyYSBzdWJzdGl0dWlyIG9zIHZhbG9yZXMgTkFgcy4gDQoNCmBgYHtyfQ0KDQojIERlbGV0YW5kbyBhcyBjb2x1bmFzIHF1ZSBuw6NvIHPDo28gZGUgaW50ZXJlc3NlIHBhcmEgbyBkYXRhc2V0LiANCg0KdHJhaW5fcXVhbGkgPC10cmFpbl9xdWFsaSAlPiUgc2VsZWN0KC1jKEFsbGV5LCBQb29sUUMsIEZlbmNlLCBNaXNjRmVhdHVyZSkpDQpwcmludChkaW0odHJhaW5fcXVhbGkpKQ0KDQoNCg0KIyBMaXN0YSBjb20gYXMgdmFyacOhdmVpcyA6IE1hc1ZuclR5cGUsQnNtdFF1YWwsQnNtdENvbmQsQnNtdEV4cG9zdXJlLEJzbXRGaW5UeXBlMSxCc210RmluVHlwZTIsRWxlY3RyaWNhbCwNCiMgRmlyZXBsYWNlUXUsR2FyYWdlVHlwZSxHYXJhZ2VGaW5pc2gsR2FyYWdlUXVhbCxHYXJhZ2VDb25kDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIw0KIyBDb250YW5kbyBvcyB2YWxvcmVzIE5BYHMgZGFzIHZhcmnDoXZlaXMgY29tIG1pc3NpbmcgdmFsdWVzDQp1bmlxdWUodHJhaW5fcXVhbGkkTWFzVm5yVHlwZSkNCnNvcnQodGFibGUodHJhaW5fcXVhbGkkTWFzVm5yVHlwZSkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgTWFzVm5yVHlwZS4NCnRyYWluX3F1YWxpIDwtIHRyYWluX3F1YWxpICU+JSBtdXRhdGUoTWFzVm5yVHlwZT1jb2FsZXNjZShNYXNWbnJUeXBlLCJOb25lIikpDQoNCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEJzbXRRdWFsKQ0KDQpzb3J0KHRhYmxlKHRyYWluX3F1YWxpJEJzbXRRdWFsKSkNCg0KIyBBbHRlcmFuZG8gb3MgdmFsb3JlcyBkZSBOQSBwYXJhIG8gdmFsb3IgZGEgTW9kYSBkZSBCc210UXVhbC4NCnRyYWluX3F1YWxpIDwtIHRyYWluX3F1YWxpICU+JSAgICANCiAgbXV0YXRlKEJzbXRRdWFsPWNvYWxlc2NlKEJzbXRRdWFsLCJUQSIpKQ0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEJzbXRDb25kKQ0KDQpzb3J0KHRhYmxlKHRyYWluX3F1YWxpJEJzbXRDb25kKSkNCg0KIyBBbHRlcmFuZG8gb3MgdmFsb3JlcyBkZSBOQSBwYXJhIG8gdmFsb3IgZGEgTW9kYSBkZSBCc210Q29uZA0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoQnNtdENvbmQ9Y29hbGVzY2UoQnNtdENvbmQsIlRBIikpDQoNCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMNCg0KIyBDb250YW5kbyBvcyB2YWxvcmVzIE5BYHMgZGFzIHZhcmnDoXZlaXMgY29tIG1pc3NpbmcgdmFsdWVzDQp1bmlxdWUodHJhaW5fcXVhbGkkQnNtdEV4cG9zdXJlKQ0KDQpzb3J0KHRhYmxlKHRyYWluX3F1YWxpJEJzbXRFeHBvc3VyZSkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgQnNtdEV4cG9zdXJlDQp0cmFpbl9xdWFsaSA8LSB0cmFpbl9xdWFsaSAlPiUgICAgDQogIG11dGF0ZShCc210RXhwb3N1cmU9Y29hbGVzY2UoQnNtdEV4cG9zdXJlLCJObyIpKQ0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEJzbXRGaW5UeXBlMSkNCg0Kc29ydCh0YWJsZSh0cmFpbl9xdWFsaSRCc210RmluVHlwZTEpKQ0KDQojIEFsdGVyYW5kbyBvcyB2YWxvcmVzIGRlIE5BIHBhcmEgbyB2YWxvciBkYSBNb2RhIGRlIEJzbXRGaW5UeXBlMQ0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoQnNtdEZpblR5cGUxPWNvYWxlc2NlKEJzbXRGaW5UeXBlMSwiVW5mIikpDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEJzbXRGaW5UeXBlMikNCg0Kc29ydCh0YWJsZSh0cmFpbl9xdWFsaSRCc210RmluVHlwZTIpKQ0KDQojIEFsdGVyYW5kbyBvcyB2YWxvcmVzIGRlIE5BIHBhcmEgbyB2YWxvciBkYSBNb2RhIGRlIEJzbXRGaW5UeXBlMg0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoQnNtdEZpblR5cGUyPWNvYWxlc2NlKEJzbXRGaW5UeXBlMiwiVW5mIikpDQoNCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMNCg0KIyBDb250YW5kbyBvcyB2YWxvcmVzIE5BYHMgZGFzIHZhcmnDoXZlaXMgY29tIG1pc3NpbmcgdmFsdWVzDQp1bmlxdWUodHJhaW5fcXVhbGkkRWxlY3RyaWNhbCkNCg0Kc29ydCh0YWJsZSh0cmFpbl9xdWFsaSRFbGVjdHJpY2FsKSkNCg0KIyBBbHRlcmFuZG8gb3MgdmFsb3JlcyBkZSBOQSBwYXJhIG8gdmFsb3IgZGEgTW9kYSBkZSBFbGVjdHJpY2FsDQp0cmFpbl9xdWFsaSA8LSB0cmFpbl9xdWFsaSAlPiUgICAgDQogIG11dGF0ZShFbGVjdHJpY2FsPWNvYWxlc2NlKEVsZWN0cmljYWwsIlNCcmtyIikpDQoNCg0KIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIw0KDQojIENvbnRhbmRvIG9zIHZhbG9yZXMgTkFgcyBkYXMgdmFyacOhdmVpcyBjb20gbWlzc2luZyB2YWx1ZXMNCnVuaXF1ZSh0cmFpbl9xdWFsaSRGaXJlcGxhY2VRdSkNCg0Kc29ydCh0YWJsZSh0cmFpbl9xdWFsaSRGaXJlcGxhY2VRdSkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgRmlyZXBsYWNlUXUNCnRyYWluX3F1YWxpIDwtIHRyYWluX3F1YWxpICU+JSAgICANCiAgbXV0YXRlKEZpcmVwbGFjZVF1PWNvYWxlc2NlKEZpcmVwbGFjZVF1LCJHZCIpKQ0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEdhcmFnZVR5cGUpDQoNCnNvcnQodGFibGUodHJhaW5fcXVhbGkkR2FyYWdlVHlwZSkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgR2FyYWdlVHlwZQ0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlVHlwZT1jb2FsZXNjZShHYXJhZ2VUeXBlLCJBdHRjaGQiKSkNCg0KIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIw0KDQojIENvbnRhbmRvIG9zIHZhbG9yZXMgTkFgcyBkYXMgdmFyacOhdmVpcyBjb20gbWlzc2luZyB2YWx1ZXMNCnVuaXF1ZSh0cmFpbl9xdWFsaSRHYXJhZ2VGaW5pc2gpDQoNCnNvcnQodGFibGUodHJhaW5fcXVhbGkkR2FyYWdlRmluaXNoKSkNCg0KIyBBbHRlcmFuZG8gb3MgdmFsb3JlcyBkZSBOQSBwYXJhIG8gdmFsb3IgZGEgTW9kYSBkZSBHYXJhZ2VGaW5pc2gNCnRyYWluX3F1YWxpIDwtIHRyYWluX3F1YWxpICU+JSAgICANCiAgbXV0YXRlKEdhcmFnZUZpbmlzaD1jb2FsZXNjZShHYXJhZ2VGaW5pc2gsIlVuZiIpKQ0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEdhcmFnZVF1YWwpDQoNCnNvcnQodGFibGUodHJhaW5fcXVhbGkkR2FyYWdlUXVhbCkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgR2FyYWdlUXVhbA0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlUXVhbD1jb2FsZXNjZShHYXJhZ2VRdWFsLCJUQSIpKQ0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEdhcmFnZUNvbmQpDQoNCnNvcnQodGFibGUodHJhaW5fcXVhbGkkR2FyYWdlQ29uZCkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgR2FyYWdlQ29uZA0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlQ29uZD1jb2FsZXNjZShHYXJhZ2VDb25kLCJUQSIpKQ0KDQoNCmBgYA0KTm8gZW50YW50bywgYWx0ZXJhciBuYSBtw6NvIGNhZGEgdmFyacOhdmVsIG7Do28gw6kgYSBzb2x1w6fDo28gbWFpcyBpbnRlbGlnZW50ZSwgaW1hZ2luZSB1bSBjYXNvIGRlIHVtIGRhdGFzZXQgY29tIGNlbnRlbmFzIG91IGF0w6kgbWlsaGFyZXMgZGUgdmFyacOhdmVpcyBwYXJhIHNlcmVtIGFsdGVyYWRhcy4NCg0KTyBtZWxob3IgYSBzZXIgZmVpdG8gw6kgbG9vcCBkbyB0aXBvIGZvciwgY2FwYXogZGUgdmFycmVyIHRvZGFzIGFzIHZhcmnDoXZlaXMsIG9idGVyIG8gdmFsb3IgZGUgZnJlcXVlbmNpYSBwYXJhIGNhZGEgdmFsb3IgZSBhcm1hemVuYXIgZXN0ZSBjb20gYSBtb2RhIGUgZW0gc2VndWlkYSBzdWJzdGl0dWlyIG9zIG1pc3NpbmcgdmFsdWVzIGRlIGNhZGEgdmFyacOhdmVsLiANCg0KVmFtb3MgZW50w6NvIGNhcnJlZ2FyIG8gYmFuY28gZGUgZGFkb3Mgbm92YW1lbnRlIHBhcmEgYW51bGFyIGFzIHRyYW5zZm9ybWHDp8O1ZXMgcmVhbGl6YWRhcyBlIHNlcGFyYXIgZW0gZGFkb3MgcXVhbnRpdGF0aXZvcyBlIHF1YWxpdGF0aXZvcyBlIGVtIHNlZ3VpZGEgc29tYXIgb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzLiANCg0KYGBge3J9DQoNCiMgU29tYW5kbyBvcyB2YWxvcmVzIE5BYHMgcGFyYSBjYWRhIGNvbHVuYQ0KY29sU3Vtcyhpcy5uYSh0cmFpbl9xdWFsaSkpDQoNCiMgTW9udGFuZG8gdW0gbG9vcCBkbyB0aXBvIGZvciANCmZvcihpdGVtIGluIGNvbG5hbWVzKHRyYWluX3F1YWxpKSl7DQogIA0KICB4ID0gZGF0YS5mcmFtZShzb3J0KHRhYmxlKHRyYWluX3F1YWxpW2l0ZW1dKSkpDQogICNwcmludCh4KQ0KICBtb2RlID0gdGFpbCh4LCBuID0gMSkNCiAgI3ByaW50KG1vZGUpDQogIG1vZGEgPSAobW9kZVsxLDFdKQ0KICBtb2RhIDwtIChhcy5jaGFyYWN0ZXIobW9kYSkpDQogICNwcmludChtb2RhKQ0KICB0cmFpbl9xdWFsaSA8LSB0cmFpbl9xdWFsaSAlPiUgDQogIG11dGF0ZShhY3Jvc3MoaXRlbSwgfiBjYXNlX3doZW4oaXMubmEoLikgfiBtb2RhLCBUUlVFIH4gLikpKQ0KICANCiAgDQogIA0KfQ0KDQojIHRlc3RhbmRvIG8gcmVzdWx0YWRvDQp1bmlxdWUodHJhaW5fcXVhbGkkU3RyZWV0KQ0KdGFibGUodHJhaW5fcXVhbGkkU3RyZWV0KQ0KIyBTb21hbmRvIG9zIHZhbG9yZXMgTkFgcyBwYXJhIGNhZGEgY29sdW5hIHBhcmEgY29uZmVyaXIgb3MgcmVzdWx0YWRvcyANCmNvbFN1bXMoaXMubmEodHJhaW5fcXVhbGkpKQ0KYGBgDQoNCiMgNC4gTGltcGV6YSBkZSBkYWRvcyBxdWFudGl0YXRpdm9zDQoNCkRhIG1lc21hIG1hbmVpcmEgcXVlIHJlYWxpemFtb3MgYSBsaW1wZXphIGRvcyBkYWRvcyBmYWx0YW50ZXMgbm8gYmFuY28gZGUgZGFkb3MgcXVhbGl0YXRpdm9zIGluaWNpYW1vcyBhcXVpIGEgbGltcGV6YSBkZSBkYWRvcyBmYWx0YW50ZXMgbm9zIGRhZG9zIHF1YW50aXRhdGl2b3MNCg0KYGBge3J9DQojIFZpc3VhbGl6YW5kbyBvcyBkYWRvcyANCmdsaW1wc2UodHJhaW5fcXVhbnRpKQ0KDQojIFNvbWFuZG8gb3MgdmFsb3JlcyBmYWx0YW50ZXMNCmNvbFN1bXMoaXMubmEodHJhaW5fcXVhbnRpKSkNCg0KDQpgYGANCiAgDQpDb20gbyByZXN1bHRhZG8gZG8gc29tYXTDs3JpbyBkb3MgdmFsb3JlcyBhdXNlbnRlcyBwb3IgdmFyacOhdmVsIGRvIGJhbmNvIGRlIGRhZG9zIHF1YW50aXRhdGl2b3MsIG9ic2VydmFtb3MgcXVlIGFwZW5hcyAzIHZhcmnDoXZlaXMgcG9zc3VlbSBtaXNzaW5nIHZhbHVlcy4gU2VuZG8gYXNzaW0gYSBtZWxob3Igb3DDp8OjbyDDqSB1dGlsaXphciBhIGZ1bsOnw6NvIG11dGF0ZSgpIGUgc3Vic3RpdHVpciBvcyBOQWBzIHBlbGEgbcOpZGlhIGVudHJlIGFzIG9ic2VydmHDp8O1ZXMgcGFyYSBjYWRhIHZhcmnDoXZlbC4gDQoNCmBgYHtyfQ0KIyBMb3RGcm9udGFnZQ0KdHJhaW5fcXVhbnRpIDwtIHRyYWluX3F1YW50aSAlPiUgICAgDQogIG11dGF0ZShMb3RGcm9udGFnZT1jb2FsZXNjZShMb3RGcm9udGFnZSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lYW4oeCA9dHJhaW5fcXVhbnRpJExvdEZyb250YWdlLCBuYS5ybSA9IFQgKSkpDQoNCiMgTWFzVm5yQXJlYQ0KdHJhaW5fcXVhbnRpIDwtIHRyYWluX3F1YW50aSAlPiUgICAgDQogIG11dGF0ZShNYXNWbnJBcmVhPWNvYWxlc2NlKE1hc1ZuckFyZWEsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRyYWluX3F1YW50aSRNYXNWbnJBcmVhLCBuYS5ybSA9IFQgKSkpDQoNCiMgR2FyYWdlWXJCbHQNCnRyYWluX3F1YW50aSA8LSB0cmFpbl9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlWXJCbHQ9Y29hbGVzY2UoR2FyYWdlWXJCbHQsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRyYWluX3F1YW50aSRHYXJhZ2VZckJsdCwgbmEucm0gPSBUICkpKQ0KDQojIENvbmZlcmluZG8gc2UgYWluZGEgZXhpc3RlIHZhbG9yZXMgZmFsdGFudGVzOg0KY29sU3Vtcyhpcy5uYSh0cmFpbl9xdWFudGkpKQ0KDQpgYGANCiMgNS4gVW5pbmRvIG9zIGRhdGFmcmFtZXMgcXVhbGkgZSBxdWFudGkgZGUgdHJlaW5vIA0KDQpBcMOzcyBhIGxpbXBlemEgZG9zIGRhZG9zIGRlIGZvcm1hIGFkZXF1YWRhIGRlIGFjb3JkbyBjb20gbyB0aXBvIGRlIGRhZG8sIG9uZGUgcGFyYSBvcyBkYWRvcyBjYXRlZ8Ozcmljb3MgbsOzcyB1dGlsaXphbW9zIG8gdmFsb3IgZGEgbW9kYSBwYXJhIHN1YnN0aXR1aXIgb3MgbWlzc2luZyB2YWx1ZXMgZSBvIHZhbG9yIGRhIG1lZGlhIHBhcmEgc3Vic3RpdHVpciBvcyBkYWRvcyBhdXNlbnRlIG5vcyBkYWRvcyBxdWFudGl0YXRpdm9zLCBwb2RlbW9zIGNvbSBzZWd1cmFuw6dhIHVuaXIgZXNzZSBiYW5jb3MgZGUgZGFkb3MgZW0gYXBlbmFzIHVtIGJhbmNvIGRlIGRhZG8gasOhIHRyYXRhZG8gZSBwcm9udG8gcGFyYSBhIGV0YXBhIGRlIGFuw6FsaXNlIGRlIGRhZG9zLiANCg0KYGBge3J9DQoNCiMgVW5pbmRvIG9zIGJhbmNvcyBkZSBkYWRvcw0KDQp0cmFpbi5jbGVhbiA8LSBjYmluZCh0cmFpbl9xdWFsaSx0cmFpbl9xdWFudGkpDQpuYW1lcyh0cmFpbi5jbGVhbikNCmBgYA0KDQojIDYuIExpbXBhbmRvIG9zIGRhZG9zIGRlIHRlc3RlLg0KDQpEYSBtZXNtYSBmb3JtYSBxdWUgcHJvY2VkZW1vcyBjb20gb3MgZGFkb3MgZGUgdHJlaW5vIGlyZW1vcyBhYm9yZGFyIGFnb3JhIG9zIGRhZG9zIGRlIHRlc3RlIGUgZGVpeGFyIHRvZG9zIG9zIG5vc3NvcyBiYW5jb3MgZGUgZGFkb3MgcHJvbnRvcyBwYXJhIGEgZXRhcGEgZGUgYW7DoWxpc2UgZXhwbG9yYXTDs3JpYSBkZSBkYWRvcy4gDQpBIHByaW1laXJhIGV0YXBhIHNlcsOhIGEgc2VwYXJhw6fDo28gZW0gZGFkb3MgcXVhbGl0YXRpdm9zIGUgZGFkb3MgcXVhbnRpdGF0aXZvcywgc2VndWlkbyBkYSBsaW1wZXphICBkb3MgdmFsb3JlcyBhdXNlbnRlcyBlIHBvciBmaW0gYSB1bmnDo28gZG9zIGJhbmNvcyBxdWFsaSBlIHF1YW50aSBkZSB0ZXN0ZSBlbSBhcGVuYXMgdW0gZGF0YWZyYW1lIHRyYXRhZG8uIA0KDQpgYGB7cn0NCiMgVmlzdWFsaXphbmRvIG9zIGRhZG9zIGRlIHRlc3RlDQpnbGltcHNlKHRlc3QpDQoNCiMgc2VwYXJhbmRvIG9zIGRhZG9zIGRlIHRlc3RlIGVtIHF1YW50aSBlIHF1YWxpDQoNCnRlc3RfcXVhbnRpIDwtIHNlbGVjdF9pZih0ZXN0LCBpcy5udW1lcmljKQ0KDQp0ZXN0X3F1YWxpIDwtIHNlbGVjdF9pZih0ZXN0LCBpcy5jaGFyYWN0ZXIpDQoNCiMgVmlzdWFsaXphbmRvIG8gcmVzdWx0YWRvDQoNCmdsaW1wc2UodGVzdF9xdWFsaSkNCmdsaW1wc2UodGVzdF9xdWFudGkpDQoNCiMgVHJhdGFuZG8gb3MgZGFkb3MgcXVhbGl0YXRpdm9zDQoNCmNvbFN1bXMoaXMubmEodGVzdF9xdWFsaSkpDQoNCg0KIyBPYnNlcnZlIHF1ZSB0ZW1vcyBvcyBtZXNtb3MgcHJvYmxlbWFzLCBleGlzdGVtIHZhcmnDoXZlbCBxdWUgc8OjbyBwcmF0aWNhbWVudGUgbnVsYXMsIHNlbmRvDQojIGEgbWVsaG9yIGVzY29saGEgZGVsZXRhciBlbGFzIGRvIG5vc3NvIGJhbmNvIGRlIGRhZG9zLiANCg0KdGVzdF9xdWFsaSA8LSB0ZXN0X3F1YWxpICU+JSBzZWxlY3QoLWMoQWxsZXksIFBvb2xRQywgRmVuY2UsIE1pc2NGZWF0dXJlKSkNCmdsaW1wc2UodGVzdF9xdWFsaSkNCg0KDQojIEFwbGljYW5kbyBvIG1lc21vIGxvb3AgZm9yIHBhcmEgdHJhdGFyIG9zIGRhZG8gZGUgdGVzdGUgcXVhbGl0YXRpdm9zLg0KDQpmb3IoaXRlbSBpbiBjb2xuYW1lcyh0ZXN0X3F1YWxpKSl7DQogICNwcmludChpdGVtKSANCiAgeCA9IGRhdGEuZnJhbWUoc29ydCh0YWJsZSh0ZXN0X3F1YWxpW2l0ZW1dKSkpDQogICNwcmludCh4KQ0KICBtb2RlID0gdGFpbCh4LCBuID0gMSkNCiAgI3ByaW50KG1vZGUpDQogIG1vZGEgPSAobW9kZVsxLDFdKQ0KICBtb2RhIDwtIGFzLmNoYXJhY3Rlcihtb2RhKQ0KICAjcHJpbnQodHJhaW5fcXVhbGlbaXRlbV0pDQogIHRlc3RfcXVhbGkgPC0gdGVzdF9xdWFsaSAlPiUgDQogIG11dGF0ZShhY3Jvc3MoaXRlbSwgfiBjYXNlX3doZW4oaXMubmEoLikgfiBtb2RhLCBUUlVFIH4gLikpKQ0KfQ0KDQojIENvbmZlcmluZG8gYSBzb21hIGRvcyB2YWxvcmVzIG51bG9zIG5hcyBjb2x1bmFzIGFww7NzIGEgdHJhbnNmb3Jtw6fDo28gDQp1bmlxdWUodGVzdF9xdWFsaSRNYXNWbnJUeXBlKQ0KdGFibGUodGVzdF9xdWFsaSRNYXNWbnJUeXBlKQ0KY29sU3Vtcyhpcy5uYSh0ZXN0X3F1YWxpKSkNCg0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIw0KDQojIFRyYXRhbmRvIG9zIGRhZG9zIHF1YW50aXRhdGl2b3MgDQoNCiMgdmlzdWFpemFuZG8gb3MgZGFkb3MNCmdsaW1wc2UodGVzdF9xdWFudGkpDQoNCiMgQ29uZmVyaW5kbyBvcyB2YWxvcmVzIGF1c2VudGVzDQoNCmNvbFN1bXMoaXMubmEodGVzdF9xdWFudGkpKQ0KDQojIEFvIHRvZG8gdGVtb3MgOCB2YXJpYXZlaXMgcXVlIGFwcmVzZW50YW0gdmFsb3JlcyBhdXNlbnRlcy4NCnRlc3RfcXVhbnRpIDwtIHRlc3RfcXVhbnRpICU+JSAgICANCiAgbXV0YXRlKExvdEZyb250YWdlPWNvYWxlc2NlKExvdEZyb250YWdlLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWVhbih4ID10ZXN0X3F1YW50aSRMb3RGcm9udGFnZSwgbmEucm0gPSBUICkpKQ0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoTWFzVm5yQXJlYT1jb2FsZXNjZShNYXNWbnJBcmVhLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWVhbih4ID10ZXN0X3F1YW50aSRNYXNWbnJBcmVhLCBuYS5ybSA9IFQgKSkpDQp0ZXN0X3F1YW50aSA8LSB0ZXN0X3F1YW50aSAlPiUgICAgDQogIG11dGF0ZShCc210RmluU0YxID1jb2FsZXNjZShCc210RmluU0YxICwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lYW4oeCA9dGVzdF9xdWFudGkkQnNtdEZpblNGMSAsIG5hLnJtID0gVCApKSkNCg0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoQnNtdEZpblNGMiA9Y29hbGVzY2UoQnNtdEZpblNGMiAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRlc3RfcXVhbnRpJEJzbXRGaW5TRjIgLCBuYS5ybSA9IFQgKSkpDQoNCnRlc3RfcXVhbnRpIDwtIHRlc3RfcXVhbnRpICU+JSAgICANCiAgbXV0YXRlKEJzbXRVbmZTRiA9Y29hbGVzY2UoQnNtdFVuZlNGICwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lYW4oeCA9dGVzdF9xdWFudGkkQnNtdFVuZlNGICwgbmEucm0gPSBUICkpKQ0KDQoNCnRlc3RfcXVhbnRpIDwtIHRlc3RfcXVhbnRpICU+JSAgICANCiAgbXV0YXRlKFRvdGFsQnNtdFNGID1jb2FsZXNjZShUb3RhbEJzbXRTRiAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRlc3RfcXVhbnRpJFRvdGFsQnNtdFNGICwgbmEucm0gPSBUICkpKQ0KDQp0ZXN0X3F1YW50aSA8LSB0ZXN0X3F1YW50aSAlPiUgICAgDQogIG11dGF0ZShCc210RnVsbEJhdGggPWNvYWxlc2NlKEJzbXRGdWxsQmF0aCAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRlc3RfcXVhbnRpJEJzbXRGdWxsQmF0aCAsIG5hLnJtID0gVCApKSkNCg0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoQnNtdEhhbGZCYXRoID1jb2FsZXNjZShCc210SGFsZkJhdGggLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWVhbih4ID10ZXN0X3F1YW50aSRCc210SGFsZkJhdGggLCBuYS5ybSA9IFQgKSkpDQoNCg0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlWXJCbHQgPWNvYWxlc2NlKEdhcmFnZVlyQmx0ICwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lYW4oeCA9dGVzdF9xdWFudGkkR2FyYWdlWXJCbHQgLCBuYS5ybSA9IFQgKSkpDQoNCg0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlQ2FycyA9Y29hbGVzY2UoR2FyYWdlQ2FycyAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRlc3RfcXVhbnRpJEdhcmFnZUNhcnMgLCBuYS5ybSA9IFQgKSkpDQoNCg0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlQXJlYSA9Y29hbGVzY2UoR2FyYWdlQXJlYSAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRlc3RfcXVhbnRpJEdhcmFnZUFyZWEgLCBuYS5ybSA9IFQgKSkpDQoNCmNvbFN1bXMoaXMubmEodGVzdF9xdWFudGkpKQ0KDQojIEp1bnRhbmRvIG9zIGRhdGFmcmFtZXMNCg0KdGVzdC5jbGVhbiA8LSBjYmluZCh0ZXN0X3F1YWxpLHRlc3RfcXVhbnRpKQ0KDQoNCg0KYGBgDQoNClBvciBmaW0sIGNhYmUgYWdvcmEgY29uZmVyaXIgb3MgZGFkb3MgZGUgdHJlaW5vIGUgdGVzdGUgdHJhdGFkb3MNCg0KYGBge3J9DQoNCmNvbFN1bXMoaXMubmEodHJhaW4uY2xlYW4pKQ0KY29sU3Vtcyhpcy5uYSh0ZXN0LmNsZWFuKSkNCg0KDQpgYGANCiMgNy4gQW7DoWxpc2UgRXhwbG9yYXTDs3JpYSBkZSBEYWRvcy4NCg0KTmVzdGEgZXRhcGEgaW5pY2lhcmVtb3MgdW1hIGFuYWxpc2UgZGVzY3JpdGl2YSB0YW50byBkYXMgdmFyacOhdmVpcyBxdWFsaXRhdGl2YXMgY29tbyBxdWFudGl0YXRpdmFzIGRvIGJhbmNvIGRlIGRhZG9zIGRlIHRyZWluby4gIFBhcmEgb3MgZGFkb3MgcXVhbnRpdGF0aXZvcyBhYm9yZGFyZW1vcyBlbSBwcmltZWlyYSBpbnN0YW5jaWEgdW1hIGludmVzdGlnYcOnw6NvIHBvciBlc3RhdGlzdGljYSBkZWNyaXRpdmEgdW5pdmFyaWFkYSBlIGRlcG9pcyBiaXZhcmlhZGEgcGFyYSBlbnRlbmRlciBhIHN1YSByZWxhw6fDo28gY29tIGEgdmFyaWF2ZWwgWSBkbyBwcm9ibGVtYSBxdWUgw6kgYSBTYWxlc1ByaWNlLiANCg0KIyMjIDcuMSBFc3RhdGlzdGljYSBEZXNjcml0aXZhIHBhcmEgZGFkb3MgUXVhbGl0YXRpdm9zLg0KQSBlc3RhdGlzdGljYSB1bml2YXJpYWRhIHBhcmEgIGNvbnRlbXBsYSB0YWJlbGFzIGRlIGZlcmVxdWVuY2lhIGRlIG9jb3JyZW5jaWEgbyBxdWUgasOhIHZpbW9zIGFudGVyaW9ybWVudGUgYW8gb2J0ZXJtb3MgYSBtb2RhLCByZXByZXNlbnRhw6fDo28gZ3LDoWZpY2EgZGEgZGlzdHJpYnVpw6fDo28gZSBtZWRpZGFzIGRlIGxvY2FsaXphw6fDo28sIGRpc3BlcnPDo28gb3UgdmFyaWFiaWxpZGFkZSBlIG1lZGlkYXMgZGUgZm9ybWEoYXNzaW1ldHJpYSBlIGN1cnRvc2lzKS4NCg0KQSB0YWJlbGEgZGUgZGlzdHJpYnVpw6fDo28gZGUgZnJlcXXDqm5jaWFzIMOpIGNhbGN1bGFkYSBwYXJhIGNhZGEgdmFsb3IgZGlzY3JldG8gZGEgdmFyacOhdmVsLiBBIGZyZXF1w6puY2lhIHBvZGUgc2VyIGFic29sdXRhIHF1ZSBpbmZvcm1hIG8gbsO6bWVybyBkZSBvY29ycsOqbmNpYXMgZGUgY2FkYSBlbGVtZW50byBpIG5hIGFtb3N0cmEsIHBvZGUgc2VyIHVtYSBmcmVxdcOqbmNpYSByZWxhdGl2YSBxdWUgZm9ybmVjZSBhIHBvcmNlbnRhZ2VtICUgcmVsYXRpdmEgw6AgZnJlcXXDqm5jaWEgYWJzb2x1dGEsIHBvZGUgc2VyIHVtYSBmcmVxdcOqbmNpYSBhY3VtdWxhZGEgcXVlIHJlcHJlc2VudGEgYSBzb21hIGRlIHRvZG9zIG9zIGVsZW1lbnRvcyBlIHBvciBmaW0gcG9kZSBzZXIgdW1hIGZyZXF1w6puY2lhIHJlbGF0aXZhIGFjdW11bGFkYSBxdWUgw6kgYSBmcmVxdWVuY2lhIHJlbGF0aXZhIMOgIGFjdW11bGFkYS4gDQoNCmBgYHtyfQ0KDQojIENhcnJlZ2FuZG8gb3MgcGFjb3Rlcy4gDQoNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkocGxvdGx5KQ0KDQoNCiMgQ2FsY3VsbyBkYXMgZnJlcXVlbmNpYXMgYWJzb2x1dGFzIA0KDQp0YWJlbGEuZnJlcS5NU1pvbmluZyA8LSBhcy5kYXRhLmZyYW1lKHNvcnQodGFibGUodHJhaW5fcXVhbGkkTVNab25pbmcpKSkNCmNvbHVuYXMgPC0gYyAoIk1TWm9uaW5nIiwgIkZyZXEiKQ0KbmFtZXModGFiZWxhLmZyZXEuTVNab25pbmcpIDwtIGNvbHVuYXMNCg0KIyBCYXJyYXMgdmVydGljaWFzIA0KZ2dwbG90bHkoDQpnZ3Bsb3QoZGF0YSA9IHRhYmVsYS5mcmVxLk1TWm9uaW5nLCBhZXMoeCA9IE1TWm9uaW5nLCB5ID0gRnJlcSkpKw0KICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgZmlsbCA9ICJibHVlIiwgY29sb3IgPSAiYmxhY2siKSsNCiAgICB0aGVtZV9jbGFzc2ljKGJhc2Vfc2l6ZSA9IDE4KSANCikNCg0KDQojIEJhcnJhcyBlbXBpbGhhZGFzDQpnZ3Bsb3RseSgNCnRhYmVsYS5mcmVxLk1TWm9uaW5nICU+JSANCiAgZ2dwbG90KGFlcyh4ID0gIiIsIHkgPSBGcmVxLCBmaWxsID0gTVNab25pbmcsIGxhYmVsID0gRnJlcSkpKw0KICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5Iiwgd2lkdGggPSAwLjMpICsgY29vcmRfZmxpcCgpICsgDQogIGdlb21fbGFiZWwocG9zaXRpb24gPSBwb3NpdGlvbl9zdGFjayh2anVzdCA9IDAuNSkpKw0KICB0aGVtZV92b2lkKCkNCikNCg0KDQpgYGANCg0KRGVzdGEgbWFuZWlyYSBjcmlhbW9zIHVtYSBmb3JtYSBlZmljaWVudGUgZGUgZXhwbG9yYW1vcyBvIGJhbmNvIGRlIGRhZG9zIHBhcmEgYXMgdmFyacOhdmVpcyBjYXRlZ8OzcmljYXMuIA0KQ29tbyB0ZW1vcyAzOSB2YXJpw6F2ZWlzIGV4cGxvcmF0w7NyaWFzIG8gbWVsaG9yIGEgZmF6ZXIgw6kgY3JpYXIgdW0gZnVuw6fDo28gcXVlIG5vcyByZXRvcm5lIGVzc2FzIHRyw6pzIGluZm9ybWHDp8O1ZXMsIGEgdGFiZWxhIGRlIGZyZXF1w6puY2lhLCBvIGdyYWZpY28gZGUgYmFycmFzIHZlcnRpY2FpcyBlIGdyYWZpY28gZGUgYmFycmFzIGVtcGlsaGFkYXMuIA0KDQpFbnTDo28gdmFtb3MgY29sb2NhciBhIG3Do28gbmEgbWFzc2EhIQ0KDQoNCmBgYHtyfQ0KYW5hbGlzZV9leHBsb19jYXRlZ29yaWNhIDwtIGZ1bmN0aW9uKHNlcmllKXsNCiAgDQogICMgQ2FsY3VsbyBkYXMgZnJlcXVlbmNpYXMgYWJzb2x1dGFzIA0KDQogIHRhYmVsYS5mcmVxIDwtIGFzLmRhdGEuZnJhbWUoc29ydCh0YWJsZShzZXJpZSkpKQ0KICBjb2x1bmFzIDwtIGMgKCJWYXJpYXZlbCIsICJGcmVxIikNCiAgbmFtZXModGFiZWxhLmZyZXEpIDwtIGNvbHVuYXMNCiAgDQogICNHcmFmaWNvIGRlIGJhcnJhcyB2ZXJ0aWNhaXMNCiAgZzEgPC0gZ2dwbG90bHkoDQogIGdncGxvdChkYXRhID0gdGFiZWxhLmZyZXEsIGFlcyh4ID0gVmFyaWF2ZWwsIHkgPSBGcmVxKSkrDQogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBmaWxsID0gImJsdWUiLCBjb2xvciA9ICJibGFjayIpKw0KICB0aGVtZV9jbGFzc2ljKGJhc2Vfc2l6ZSA9IDE4KSANCikNCiAgDQogICMgQmFycmFzIGVtcGlsaGFkYXMNCiAgZzIgPC0gZ2dwbG90bHkoDQogIHRhYmVsYS5mcmVxICU+JSANCiAgZ2dwbG90KGFlcyh4ID0gIiIsIHkgPSBGcmVxLCBmaWxsID0gVmFyaWF2ZWwsIGxhYmVsID0gRnJlcSkpKw0KICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5Iiwgd2lkdGggPSAwLjMpICsgY29vcmRfZmxpcCgpICsgDQogIGdlb21fbGFiZWwocG9zaXRpb24gPSBwb3NpdGlvbl9zdGFjayh2anVzdCA9IDAuNSkpKw0KICB0aGVtZV92b2lkKCkNCikNCiAgcmV0b3Jub3MgPC0gbGlzdCh0YWJlbGEuZnJlcSwgZzEsIGcyKQ0KDQogIHJldHVybihyZXRvcm5vcykNCiAgDQp9DQoNCnRyYWluX3F1YWxpJEhvdXNlU3R5bGUNCnJlc3VsdGFkb19NU1pvbmluZyA8LSBhbmFsaXNlX2V4cGxvX2NhdGVnb3JpY2Eoc2VyaWUgPSAgdHJhaW5fcXVhbGkkTVNab25pbmcpDQoNCnJlc3VsdGFkb19NU1pvbmluZ1tbM11dDQoNCnJlc3VsdGFkb19TdHJlZXQgPC0gYW5hbGlzZV9leHBsb19jYXRlZ29yaWNhKHNlcmllID0gdHJhaW5fcXVhbGkkU3RyZWV0KQ0KDQpyZXN1bHRhZG9fSG91c2VTdHlsZSA8LSBhbmFsaXNlX2V4cGxvX2NhdGVnb3JpY2Eoc2VyaWU9dHJhaW5fcXVhbGkkSG91c2VTdHlsZSkNCnJlc3VsdGFkb19Ib3VzZVN0eWxlDQoNCmBgYA0KDQoNCg==